Search in sources :

Example 6 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class ParallelIndexSupervisorTaskResourceTest method buildStateMap.

private Map<String, SubTaskSpecStatus> buildStateMap() {
    final Map<String, SubTaskSpecStatus> stateMap = new HashMap<>();
    subTaskSpecs.forEach((specId, spec) -> {
        final List<TaskStatusPlus> taskHistory = taskHistories.get(specId);
        final TaskStatusPlus runningTaskStatus = runningSpecs.get(specId);
        stateMap.put(specId, new SubTaskSpecStatus(spec, runningTaskStatus, taskHistory == null ? Collections.emptyList() : taskHistory));
    });
    return stateMap;
}
Also used : ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus)

Example 7 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class OverlordResourceTest method testGetTasksRequiresDatasourceRead.

@Test
public void testGetTasksRequiresDatasourceRead() {
    // Setup mocks for a user who has read access to "wikipedia"
    // and no access to "buzzfeed"
    expectAuthorizationTokenCheck(Users.WIKI_READER);
    // Setup mocks to return completed, active, known, pending and running tasks
    EasyMock.expect(taskStorageQueryAdapter.getCompletedTaskInfoByCreatedTimeDuration(null, null, null)).andStubReturn(ImmutableList.of(createTaskInfo("id_5", Datasources.WIKIPEDIA), createTaskInfo("id_6", Datasources.BUZZFEED)));
    EasyMock.expect(taskStorageQueryAdapter.getActiveTaskInfo(null)).andStubReturn(ImmutableList.of(createTaskInfo("id_1", Datasources.WIKIPEDIA), createTaskInfo("id_2", Datasources.BUZZFEED)));
    EasyMock.<Collection<? extends TaskRunnerWorkItem>>expect(taskRunner.getKnownTasks()).andReturn(ImmutableList.of(new MockTaskRunnerWorkItem("id_1", null), new MockTaskRunnerWorkItem("id_4", null))).atLeastOnce();
    EasyMock.<Collection<? extends TaskRunnerWorkItem>>expect(taskRunner.getPendingTasks()).andReturn(ImmutableList.of(new MockTaskRunnerWorkItem("id_4", null)));
    EasyMock.<Collection<? extends TaskRunnerWorkItem>>expect(taskRunner.getRunningTasks()).andReturn(ImmutableList.of(new MockTaskRunnerWorkItem("id_1", null)));
    // Replay all mocks
    EasyMock.replay(taskRunner, taskMaster, taskStorageQueryAdapter, indexerMetadataStorageAdapter, req, workerTaskRunnerQueryAdapter);
    // Verify that only the tasks of read access datasource are returned
    List<TaskStatusPlus> responseObjects = (List<TaskStatusPlus>) overlordResource.getTasks(null, null, null, null, null, req).getEntity();
    Assert.assertEquals(2, responseObjects.size());
    for (TaskStatusPlus taskStatus : responseObjects) {
        Assert.assertEquals(Datasources.WIKIPEDIA, taskStatus.getDataSource());
    }
}
Also used : TaskRunnerWorkItem(org.apache.druid.indexing.overlord.TaskRunnerWorkItem) Collection(java.util.Collection) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) Test(org.junit.Test)

Example 8 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class AbstractS3AssumeRoleIndexTest method doTestS3WithAssumeRoleAndInvalidExternalIdShouldFail.

void doTestS3WithAssumeRoleAndInvalidExternalIdShouldFail() throws Exception {
    if (config.getS3AssumeRoleExternalId() == null || config.getS3AssumeRoleWithExternalId() == null) {
        throw new SkipException("S3 Assume Role and external Id must be set for this test");
    }
    final String indexDatasource = "wikipedia_index_test_" + UUID.randomUUID();
    try {
        final Function<String, String> s3PropsTransform = spec -> {
            try {
                String inputSourceValue = jsonMapper.writeValueAsString(INPUT_SOURCE_OBJECTS_VALUE);
                inputSourceValue = StringUtils.replace(inputSourceValue, "%%BUCKET%%", config.getCloudBucket());
                inputSourceValue = StringUtils.replace(inputSourceValue, "%%PATH%%", config.getCloudPath());
                ImmutableMap.Builder<String, Object> s3ConfigMap = ImmutableMap.builder();
                if (isSetS3OverrideCredentials()) {
                    s3ConfigMap.put("accessKeyId", ImmutableMap.of("type", "environment", "variable", "OVERRIDE_S3_ACCESS_KEY"));
                    s3ConfigMap.put("secretAccessKey", ImmutableMap.of("type", "environment", "variable", "OVERRIDE_S3_SECRET_KEY"));
                }
                s3ConfigMap.put("assumeRoleArn", config.getS3AssumeRoleWithExternalId());
                s3ConfigMap.put("assumeRoleExternalId", "RANDOM_INVALID_VALUE_" + UUID.randomUUID());
                spec = StringUtils.replace(spec, "%%INPUT_SOURCE_CONFIG%%", jsonMapper.writeValueAsString(s3ConfigMap.build()));
                spec = StringUtils.replace(spec, "%%INPUT_SOURCE_TYPE%%", "s3");
                spec = StringUtils.replace(spec, "%%INPUT_SOURCE_PROPERTY_KEY%%", INPUT_SOURCE_OBJECTS_KEY);
                return StringUtils.replace(spec, "%%INPUT_SOURCE_PROPERTY_VALUE%%", inputSourceValue);
            } catch (Exception e) {
                throw new RuntimeException(e);
            }
        };
        final String fullDatasourceName = indexDatasource + config.getExtraDatasourceNameSuffix();
        final String taskSpec = s3PropsTransform.apply(StringUtils.replace(getResourceAsString(INDEX_TASK_WITH_OVERRIDE), "%%DATASOURCE%%", fullDatasourceName));
        final String taskID = indexer.submitTask(taskSpec);
        indexer.waitUntilTaskFails(taskID);
        TaskStatusPlus taskStatusPlus = indexer.getTaskStatus(taskID);
        // Index task is expected to fail as the external id is invalid
        Assert.assertEquals(taskStatusPlus.getStatusCode(), TaskState.FAILED);
        Assert.assertNotNull(taskStatusPlus.getErrorMsg());
        Assert.assertTrue(taskStatusPlus.getErrorMsg().contains("com.amazonaws.services.securitytoken.model.AWSSecurityTokenServiceException"), "Expect task to fail with AWSSecurityTokenServiceException");
    } finally {
        // If the test pass, then there is no datasource to unload
        closeQuietly(unloader(indexDatasource + config.getExtraDatasourceNameSuffix()));
    }
}
Also used : SkipException(org.testng.SkipException) TaskState(org.apache.druid.indexer.TaskState) ImmutableList(com.google.common.collect.ImmutableList) Assert(org.testng.Assert) ImmutableMap(com.google.common.collect.ImmutableMap) Closeable(java.io.Closeable) StringUtils(org.apache.druid.java.util.common.StringUtils) UUID(java.util.UUID) Function(java.util.function.Function) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) Pair(org.apache.druid.java.util.common.Pair) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) SkipException(org.testng.SkipException) SkipException(org.testng.SkipException)

Example 9 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class ParallelIndexPhaseRunner method run.

@Override
public TaskState run() throws Exception {
    final CountingSubTaskSpecIterator subTaskSpecIterator = new CountingSubTaskSpecIterator(subTaskSpecIterator());
    if (!subTaskSpecIterator.hasNext()) {
        LOG.warn("There's no input split to process");
        return TaskState.SUCCESS;
    }
    final long taskStatusCheckingPeriod = tuningConfig.getTaskStatusCheckPeriodMs();
    taskMonitor = new TaskMonitor<>(toolbox.getIndexingServiceClient(), tuningConfig.getMaxRetry(), estimateTotalNumSubTasks());
    TaskState state = TaskState.RUNNING;
    taskMonitor.start(taskStatusCheckingPeriod);
    try {
        LOG.info("Submitting initial tasks");
        // Submit initial tasks
        while (isRunning() && subTaskSpecIterator.hasNext() && taskMonitor.getNumRunningTasks() < maxNumConcurrentSubTasks) {
            submitNewTask(taskMonitor, subTaskSpecIterator.next());
        }
        LOG.info("Waiting for subTasks to be completed");
        while (isRunning()) {
            final SubTaskCompleteEvent<SubTaskType> taskCompleteEvent = taskCompleteEvents.poll(taskStatusCheckingPeriod, TimeUnit.MILLISECONDS);
            if (taskCompleteEvent != null) {
                final TaskState completeState = taskCompleteEvent.getLastState();
                getSubtaskCompletionCallback(taskCompleteEvent).run();
                switch(completeState) {
                    case SUCCESS:
                        final TaskStatusPlus completeStatus = taskCompleteEvent.getLastStatus();
                        if (completeStatus == null) {
                            throw new ISE("Last status of complete task is missing!");
                        }
                        if (!subTaskSpecIterator.hasNext()) {
                            // We have no more subTasks to run
                            if (taskMonitor.getNumRunningTasks() == 0 && taskCompleteEvents.isEmpty()) {
                                subTaskScheduleAndMonitorStopped = true;
                                if (subTaskSpecIterator.count == taskMonitor.getNumSucceededTasks()) {
                                    // Succeeded
                                    state = TaskState.SUCCESS;
                                } else {
                                    // Failed
                                    final ParallelIndexingPhaseProgress monitorStatus = taskMonitor.getProgress();
                                    throw new ISE("Expected [%d] tasks to succeed, but we got [%d] succeeded tasks and [%d] failed tasks", subTaskSpecIterator.count, monitorStatus.getSucceeded(), monitorStatus.getFailed());
                                }
                            }
                        } else if (taskMonitor.getNumRunningTasks() < maxNumConcurrentSubTasks) {
                            // We have more subTasks to run
                            submitNewTask(taskMonitor, subTaskSpecIterator.next());
                        } else {
                        // We have more subTasks to run, but don't have enough available task slots
                        // do nothing
                        }
                        break;
                    case FAILED:
                        // TaskMonitor already tried everything it can do for failed tasks. We failed.
                        state = TaskState.FAILED;
                        subTaskScheduleAndMonitorStopped = true;
                        final TaskStatusPlus lastStatus = taskCompleteEvent.getLastStatus();
                        if (lastStatus != null) {
                            LOG.error("Failed because of the failed sub task[%s]", lastStatus.getId());
                        } else {
                            final SubTaskSpec<?> spec = taskCompleteEvent.getSpec();
                            LOG.error("Failed to process spec[%s] with an unknown last status", spec.getId());
                        }
                        break;
                    default:
                        throw new ISE("spec[%s] is in an invalid state[%s]", taskCompleteEvent.getSpec().getId(), completeState);
                }
            }
        }
    } finally {
        stopInternal();
        if (!state.isComplete()) {
            state = TaskState.FAILED;
        }
    }
    return state;
}
Also used : TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) ISE(org.apache.druid.java.util.common.ISE) TaskState(org.apache.druid.indexer.TaskState)

Example 10 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class ParallelIndexSupervisorTaskResourceTest method checkState.

/**
 * Test all endpoints of {@link ParallelIndexSupervisorTask}.
 */
private void checkState(int expectedSucceededTasks, int expectedFailedTask, // subTaskSpecId -> response
Map<String, SubTaskSpecStatus> expectedSubTaskStateResponses) {
    Response response = task.getProgress(newRequest());
    Assert.assertEquals(200, response.getStatus());
    final ParallelIndexingPhaseProgress monitorStatus = (ParallelIndexingPhaseProgress) response.getEntity();
    // numRunningTasks
    Assert.assertEquals(runningTasks.size(), monitorStatus.getRunning());
    // numSucceededTasks
    Assert.assertEquals(expectedSucceededTasks, monitorStatus.getSucceeded());
    // numFailedTasks
    Assert.assertEquals(expectedFailedTask, monitorStatus.getFailed());
    // numCompleteTasks
    Assert.assertEquals(expectedSucceededTasks + expectedFailedTask, monitorStatus.getComplete());
    // numTotalTasks
    Assert.assertEquals(runningTasks.size() + expectedSucceededTasks + expectedFailedTask, monitorStatus.getTotal());
    // runningSubTasks
    response = task.getRunningTasks(newRequest());
    Assert.assertEquals(200, response.getStatus());
    Assert.assertEquals(runningTasks.stream().map(AbstractTask::getId).collect(Collectors.toSet()), new HashSet<>((Collection<String>) response.getEntity()));
    // subTaskSpecs
    response = task.getSubTaskSpecs(newRequest());
    Assert.assertEquals(200, response.getStatus());
    List<SubTaskSpec<SinglePhaseSubTask>> actualSubTaskSpecMap = (List<SubTaskSpec<SinglePhaseSubTask>>) response.getEntity();
    Assert.assertEquals(subTaskSpecs.keySet(), actualSubTaskSpecMap.stream().map(SubTaskSpec::getId).collect(Collectors.toSet()));
    // runningSubTaskSpecs
    response = task.getRunningSubTaskSpecs(newRequest());
    Assert.assertEquals(200, response.getStatus());
    actualSubTaskSpecMap = (List<SubTaskSpec<SinglePhaseSubTask>>) response.getEntity();
    Assert.assertEquals(runningSpecs.keySet(), actualSubTaskSpecMap.stream().map(SubTaskSpec::getId).collect(Collectors.toSet()));
    // completeSubTaskSpecs
    final List<SubTaskSpec<SinglePhaseSubTask>> completeSubTaskSpecs = expectedSubTaskStateResponses.entrySet().stream().filter(entry -> !runningSpecs.containsKey(entry.getKey())).map(entry -> entry.getValue().getSpec()).collect(Collectors.toList());
    response = task.getCompleteSubTaskSpecs(newRequest());
    Assert.assertEquals(200, response.getStatus());
    Assert.assertEquals(completeSubTaskSpecs, response.getEntity());
    // subTaskSpec
    final String subTaskId = runningSpecs.keySet().iterator().next();
    response = task.getSubTaskSpec(subTaskId, newRequest());
    Assert.assertEquals(200, response.getStatus());
    final SubTaskSpec<SinglePhaseSubTask> subTaskSpec = (SubTaskSpec<SinglePhaseSubTask>) response.getEntity();
    Assert.assertEquals(subTaskId, subTaskSpec.getId());
    // subTaskState
    response = task.getSubTaskState(subTaskId, newRequest());
    Assert.assertEquals(200, response.getStatus());
    final SubTaskSpecStatus expectedResponse = Preconditions.checkNotNull(expectedSubTaskStateResponses.get(subTaskId), "response for task[%s]", subTaskId);
    final SubTaskSpecStatus actualResponse = (SubTaskSpecStatus) response.getEntity();
    Assert.assertEquals(expectedResponse.getSpec().getId(), actualResponse.getSpec().getId());
    Assert.assertEquals(expectedResponse.getCurrentStatus(), actualResponse.getCurrentStatus());
    Assert.assertEquals(expectedResponse.getTaskHistory(), actualResponse.getTaskHistory());
    // completeSubTaskSpecAttemptHistory
    final String completeSubTaskSpecId = expectedSubTaskStateResponses.entrySet().stream().filter(entry -> {
        final TaskStatusPlus currentStatus = entry.getValue().getCurrentStatus();
        return currentStatus != null && (currentStatus.getStatusCode() == TaskState.SUCCESS || currentStatus.getStatusCode() == TaskState.FAILED);
    }).map(Entry::getKey).findFirst().orElse(null);
    if (completeSubTaskSpecId != null) {
        response = task.getCompleteSubTaskSpecAttemptHistory(completeSubTaskSpecId, newRequest());
        Assert.assertEquals(200, response.getStatus());
        Assert.assertEquals(expectedSubTaskStateResponses.get(completeSubTaskSpecId).getTaskHistory(), response.getEntity());
    }
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) AbstractTask(org.apache.druid.indexing.common.task.AbstractTask) SegmentAllocators(org.apache.druid.indexing.common.task.SegmentAllocators) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TaskResource(org.apache.druid.indexing.common.task.TaskResource) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) After(org.junit.After) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) AuthConfig(org.apache.druid.server.security.AuthConfig) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableMap(com.google.common.collect.ImmutableMap) InputFormat(org.apache.druid.data.input.InputFormat) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) SegmentAllocator(org.apache.druid.segment.realtime.appenderator.SegmentAllocator) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) TaskState(org.apache.druid.indexer.TaskState) List(java.util.List) Stream(java.util.stream.Stream) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) IntStream(java.util.stream.IntStream) Intervals(org.apache.druid.java.util.common.Intervals) InputSplit(org.apache.druid.data.input.InputSplit) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) HashMap(java.util.HashMap) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) AuthenticationResult(org.apache.druid.server.security.AuthenticationResult) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) Nullable(javax.annotation.Nullable) TaskLocation(org.apache.druid.indexer.TaskLocation) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) Test(org.junit.Test) EasyMock(org.easymock.EasyMock) Granularities(org.apache.druid.java.util.common.granularity.Granularities) RunnerTaskState(org.apache.druid.indexer.RunnerTaskState) TimeUnit(java.util.concurrent.TimeUnit) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) AbstractTask(org.apache.druid.indexing.common.task.AbstractTask) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus) Response(javax.ws.rs.core.Response) Entry(java.util.Map.Entry) Collection(java.util.Collection) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) List(java.util.List) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList)

Aggregations

TaskStatusPlus (org.apache.druid.indexer.TaskStatusPlus)21 ImmutableList (com.google.common.collect.ImmutableList)10 List (java.util.List)9 Test (org.junit.Test)9 ArrayList (java.util.ArrayList)8 Response (javax.ws.rs.core.Response)7 TaskState (org.apache.druid.indexer.TaskState)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 TaskStatus (org.apache.druid.indexer.TaskStatus)6 HashMap (java.util.HashMap)5 TaskInfo (org.apache.druid.indexer.TaskInfo)5 Collection (java.util.Collection)4 Map (java.util.Map)4 Collectors (java.util.stream.Collectors)4 Nullable (javax.annotation.Nullable)4 Task (org.apache.druid.indexing.common.task.Task)4 StringUtils (org.apache.druid.java.util.common.StringUtils)4 Function (com.google.common.base.Function)3 ResourceFilters (com.sun.jersey.spi.container.ResourceFilters)3 Collections (java.util.Collections)3