Search in sources :

Example 51 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class ForkingTaskRunnerTest method testTaskStatusWhenTaskProcessFails.

@Test
public void testTaskStatusWhenTaskProcessFails() throws ExecutionException, InterruptedException {
    ForkingTaskRunner forkingTaskRunner = new ForkingTaskRunner(new ForkingTaskRunnerConfig(), new TaskConfig(null, null, null, null, ImmutableList.of(), false, new Period("PT0S"), new Period("PT10S"), ImmutableList.of(), false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name()), new WorkerConfig(), new Properties(), new NoopTaskLogs(), new DefaultObjectMapper(), new DruidNode("middleManager", "host", false, 8091, null, true, false), new StartupLoggingConfig()) {

        @Override
        ProcessHolder runTaskProcess(List<String> command, File logFile, TaskLocation taskLocation) {
            ProcessHolder processHolder = Mockito.mock(ProcessHolder.class);
            Mockito.doNothing().when(processHolder).registerWithCloser(ArgumentMatchers.any());
            Mockito.doNothing().when(processHolder).shutdown();
            return processHolder;
        }

        @Override
        int waitForTaskProcessToComplete(Task task, ProcessHolder processHolder, File logFile, File reportsFile) {
            // Emulate task process failure
            return 1;
        }
    };
    final TaskStatus status = forkingTaskRunner.run(NoopTask.create()).get();
    Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    Assert.assertEquals("Task execution process exited unsuccessfully with code[1]. See middleManager logs for more details.", status.getErrorMsg());
}
Also used : NoopTaskLogs(org.apache.druid.tasklogs.NoopTaskLogs) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Period(org.joda.time.Period) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Properties(java.util.Properties) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) StartupLoggingConfig(org.apache.druid.server.log.StartupLoggingConfig) ForkingTaskRunnerConfig(org.apache.druid.indexing.overlord.config.ForkingTaskRunnerConfig) WorkerConfig(org.apache.druid.indexing.worker.config.WorkerConfig) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) DruidNode(org.apache.druid.server.DruidNode) File(java.io.File) Test(org.junit.Test)

Example 52 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class ForkingTaskRunnerTest method testTaskStatusWhenTaskProcessSucceedsTaskFails.

@Test
public void testTaskStatusWhenTaskProcessSucceedsTaskFails() throws ExecutionException, InterruptedException {
    ObjectMapper mapper = new DefaultObjectMapper();
    Task task = NoopTask.create();
    ForkingTaskRunner forkingTaskRunner = new ForkingTaskRunner(new ForkingTaskRunnerConfig(), new TaskConfig(null, null, null, null, ImmutableList.of(), false, new Period("PT0S"), new Period("PT10S"), ImmutableList.of(), false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name()), new WorkerConfig(), new Properties(), new NoopTaskLogs(), mapper, new DruidNode("middleManager", "host", false, 8091, null, true, false), new StartupLoggingConfig()) {

        @Override
        ProcessHolder runTaskProcess(List<String> command, File logFile, TaskLocation taskLocation) throws IOException {
            ProcessHolder processHolder = Mockito.mock(ProcessHolder.class);
            Mockito.doNothing().when(processHolder).registerWithCloser(ArgumentMatchers.any());
            Mockito.doNothing().when(processHolder).shutdown();
            for (String param : command) {
                if (param.endsWith("status.json")) {
                    mapper.writeValue(new File(param), TaskStatus.failure(task.getId(), "task failure test"));
                    break;
                }
            }
            return processHolder;
        }

        @Override
        int waitForTaskProcessToComplete(Task task, ProcessHolder processHolder, File logFile, File reportsFile) {
            return 0;
        }
    };
    final TaskStatus status = forkingTaskRunner.run(task).get();
    Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    Assert.assertEquals("task failure test", status.getErrorMsg());
}
Also used : NoopTaskLogs(org.apache.druid.tasklogs.NoopTaskLogs) Task(org.apache.druid.indexing.common.task.Task) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Period(org.joda.time.Period) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Properties(java.util.Properties) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) StartupLoggingConfig(org.apache.druid.server.log.StartupLoggingConfig) ForkingTaskRunnerConfig(org.apache.druid.indexing.overlord.config.ForkingTaskRunnerConfig) WorkerConfig(org.apache.druid.indexing.worker.config.WorkerConfig) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) DruidNode(org.apache.druid.server.DruidNode) File(java.io.File) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Test(org.junit.Test)

Example 53 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class IndexTaskClient method submitRequest.

/**
 * Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
 */
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
        Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
        if (!status.isPresent() || !status.get().isRunnable()) {
            throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
        }
        final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
        if (location.equals(TaskLocation.unknown())) {
            throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
        }
        final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
        Either<StringFullResponseHolder, FinalType> response = null;
        try {
            // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
            // for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
            checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
            response = submitRequest(request, responseHandler);
            if (response.isValue()) {
                return response.valueOrThrow();
            } else {
                final StringBuilder exceptionMessage = new StringBuilder();
                final HttpResponseStatus httpResponseStatus = response.error().getStatus();
                final String httpResponseContent = response.error().getContent();
                exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
                if (!Strings.isNullOrEmpty(httpResponseContent)) {
                    final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
                    exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
                }
                if (httpResponseStatus.getCode() == 400) {
                    // don't bother retrying if it's a bad request
                    throw new IAE(exceptionMessage.toString());
                } else {
                    throw new IOE(exceptionMessage.toString());
                }
            }
        } catch (IOException | ChannelException e) {
            // Since workers are free to move tasks around to different ports, there is a chance that a task may have been
            // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
            // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
            // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
            // we will wait for a short period then retry the request indefinitely, expecting the task's location to
            // eventually be updated.
            final Duration delay;
            if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
                String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
                if (headerId != null && !headerId.equals(taskId)) {
                    log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
                    delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
                } else {
                    delay = retryPolicy.getAndIncrementRetryDelay();
                }
            } else {
                delay = retryPolicy.getAndIncrementRetryDelay();
            }
            final String urlForLog = request.getUrl().toString();
            if (!retry) {
                // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
                // for informational purposes only); log at INFO instead of WARN.
                log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else if (delay == null) {
                // When retrying, log the final failure at WARN level, since it is likely to be bad news.
                log.warn(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else {
                try {
                    final long sleepTime = delay.getMillis();
                    // When retrying, log non-final failures at INFO level.
                    log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                    e.addSuppressed(e2);
                    throw new RuntimeException(e);
                }
            }
        } catch (NoTaskLocationException e) {
            log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
            throw e;
        } catch (Exception e) {
            log.warn(e, "Exception while sending request");
            throw e;
        }
    }
}
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Request(org.apache.druid.java.util.http.client.Request) Duration(org.joda.time.Duration) IOException(java.io.IOException) TaskStatus(org.apache.druid.indexer.TaskStatus) IAE(org.apache.druid.java.util.common.IAE) TaskLocation(org.apache.druid.indexer.TaskLocation) MalformedURLException(java.net.MalformedURLException) ChannelException(org.jboss.netty.channel.ChannelException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) IOE(org.apache.druid.java.util.common.IOE) ChannelException(org.jboss.netty.channel.ChannelException)

Example 54 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class HttpRemoteTaskRunnerTest method createTaskRunnerForTestTaskAddedOrUpdated.

private HttpRemoteTaskRunner createTaskRunnerForTestTaskAddedOrUpdated(TaskStorage taskStorage, List<Object> listenerNotificationsAccumulator) {
    TestDruidNodeDiscovery druidNodeDiscovery = new TestDruidNodeDiscovery();
    DruidNodeDiscoveryProvider druidNodeDiscoveryProvider = EasyMock.createMock(DruidNodeDiscoveryProvider.class);
    EasyMock.expect(druidNodeDiscoveryProvider.getForService(WorkerNodeService.DISCOVERY_SERVICE_KEY)).andReturn(druidNodeDiscovery);
    EasyMock.replay(druidNodeDiscoveryProvider);
    HttpRemoteTaskRunner taskRunner = new HttpRemoteTaskRunner(TestHelper.makeJsonMapper(), new HttpRemoteTaskRunnerConfig() {

        @Override
        public int getPendingTasksRunnerNumThreads() {
            return 3;
        }
    }, EasyMock.createNiceMock(HttpClient.class), DSuppliers.of(new AtomicReference<>(DefaultWorkerBehaviorConfig.defaultConfig())), new NoopProvisioningStrategy<>(), druidNodeDiscoveryProvider, taskStorage, EasyMock.createNiceMock(CuratorFramework.class), new IndexerZkConfig(new ZkPathsConfig(), null, null, null, null));
    taskRunner.start();
    if (listenerNotificationsAccumulator != null) {
        taskRunner.registerListener(new TaskRunnerListener() {

            @Override
            public String getListenerId() {
                return "test-listener";
            }

            @Override
            public void locationChanged(String taskId, TaskLocation newLocation) {
                listenerNotificationsAccumulator.add(ImmutableList.of(taskId, newLocation));
            }

            @Override
            public void statusChanged(String taskId, TaskStatus status) {
                listenerNotificationsAccumulator.add(ImmutableList.of(taskId, status));
            }
        }, Execs.directExecutor());
    }
    return taskRunner;
}
Also used : IndexerZkConfig(org.apache.druid.server.initialization.IndexerZkConfig) TaskRunnerListener(org.apache.druid.indexing.overlord.TaskRunnerListener) AtomicReference(java.util.concurrent.atomic.AtomicReference) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) HttpRemoteTaskRunnerConfig(org.apache.druid.indexing.overlord.config.HttpRemoteTaskRunnerConfig) CuratorFramework(org.apache.curator.framework.CuratorFramework) DruidNodeDiscoveryProvider(org.apache.druid.discovery.DruidNodeDiscoveryProvider) ZkPathsConfig(org.apache.druid.server.initialization.ZkPathsConfig) HttpClient(org.apache.druid.java.util.http.client.HttpClient)

Example 55 with TaskLocation

use of org.apache.druid.indexer.TaskLocation in project druid by apache.

the class TaskRunnerUtilsTest method testMakeTaskLocationURL.

@Test
public void testMakeTaskLocationURL() {
    final URL url = TaskRunnerUtils.makeTaskLocationURL(new TaskLocation("1.2.3.4", 8090, 8290), "/druid/worker/v1/task/%s/log", "foo bar&");
    Assert.assertEquals("https://1.2.3.4:8290/druid/worker/v1/task/foo%20bar%26/log", url.toString());
}
Also used : URL(java.net.URL) TaskLocation(org.apache.druid.indexer.TaskLocation) Test(org.junit.Test)

Aggregations

TaskLocation (org.apache.druid.indexer.TaskLocation)66 Test (org.junit.Test)50 Task (org.apache.druid.indexing.common.task.Task)46 TaskRunnerListener (org.apache.druid.indexing.overlord.TaskRunnerListener)44 ArrayList (java.util.ArrayList)42 Collection (java.util.Collection)40 Executor (java.util.concurrent.Executor)40 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)40 ImmutableMap (com.google.common.collect.ImmutableMap)38 Map (java.util.Map)38 HashMap (java.util.HashMap)36 TreeMap (java.util.TreeMap)36 TaskStatus (org.apache.druid.indexer.TaskStatus)20 KafkaDataSourceMetadata (org.apache.druid.indexing.kafka.KafkaDataSourceMetadata)20 KafkaIndexTask (org.apache.druid.indexing.kafka.KafkaIndexTask)20 KinesisDataSourceMetadata (org.apache.druid.indexing.kinesis.KinesisDataSourceMetadata)20 KinesisIndexTask (org.apache.druid.indexing.kinesis.KinesisIndexTask)20 DateTime (org.joda.time.DateTime)20 ConcurrentHashMap (java.util.concurrent.ConcurrentHashMap)18 TaskReportData (org.apache.druid.indexing.seekablestream.supervisor.TaskReportData)12