Search in sources :

Example 1 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class CompactSegments method run.

@Override
public DruidCoordinatorRuntimeParams run(DruidCoordinatorRuntimeParams params) {
    LOG.info("Compact segments");
    final CoordinatorCompactionConfig dynamicConfig = params.getCoordinatorCompactionConfig();
    final CoordinatorStats stats = new CoordinatorStats();
    List<DataSourceCompactionConfig> compactionConfigList = dynamicConfig.getCompactionConfigs();
    if (dynamicConfig.getMaxCompactionTaskSlots() > 0) {
        Map<String, VersionedIntervalTimeline<String, DataSegment>> dataSources = params.getUsedSegmentsTimelinesPerDataSource();
        if (compactionConfigList != null && !compactionConfigList.isEmpty()) {
            Map<String, DataSourceCompactionConfig> compactionConfigs = compactionConfigList.stream().collect(Collectors.toMap(DataSourceCompactionConfig::getDataSource, Function.identity()));
            final List<TaskStatusPlus> compactionTasks = filterNonCompactionTasks(indexingServiceClient.getActiveTasks());
            // dataSource -> list of intervals for which compaction will be skipped in this run
            final Map<String, List<Interval>> intervalsToSkipCompaction = new HashMap<>();
            int numEstimatedNonCompleteCompactionTasks = 0;
            for (TaskStatusPlus status : compactionTasks) {
                final TaskPayloadResponse response = indexingServiceClient.getTaskPayload(status.getId());
                if (response == null) {
                    throw new ISE("Got a null paylord from overlord for task[%s]", status.getId());
                }
                if (COMPACTION_TASK_TYPE.equals(response.getPayload().getType())) {
                    final ClientCompactionTaskQuery compactionTaskQuery = (ClientCompactionTaskQuery) response.getPayload();
                    DataSourceCompactionConfig dataSourceCompactionConfig = compactionConfigs.get(status.getDataSource());
                    if (dataSourceCompactionConfig != null && dataSourceCompactionConfig.getGranularitySpec() != null) {
                        Granularity configuredSegmentGranularity = dataSourceCompactionConfig.getGranularitySpec().getSegmentGranularity();
                        if (configuredSegmentGranularity != null && compactionTaskQuery.getGranularitySpec() != null && !configuredSegmentGranularity.equals(compactionTaskQuery.getGranularitySpec().getSegmentGranularity())) {
                            // We will cancel active compaction task if segmentGranularity changes and we will need to
                            // re-compact the interval
                            LOG.info("Canceled task[%s] as task segmentGranularity is [%s] but compaction config " + "segmentGranularity is [%s]", status.getId(), compactionTaskQuery.getGranularitySpec().getSegmentGranularity(), configuredSegmentGranularity);
                            indexingServiceClient.cancelTask(status.getId());
                            continue;
                        }
                    }
                    // Skip interval as the current active compaction task is good
                    final Interval interval = compactionTaskQuery.getIoConfig().getInputSpec().getInterval();
                    intervalsToSkipCompaction.computeIfAbsent(status.getDataSource(), k -> new ArrayList<>()).add(interval);
                    // Since we keep the current active compaction task running, we count the active task slots
                    numEstimatedNonCompleteCompactionTasks += findMaxNumTaskSlotsUsedByOneCompactionTask(compactionTaskQuery.getTuningConfig());
                } else {
                    throw new ISE("task[%s] is not a compactionTask", status.getId());
                }
            }
            // Skip all the intervals locked by higher priority tasks for each datasource
            // This must be done after the invalid compaction tasks are cancelled
            // in the loop above so that their intervals are not considered locked
            getLockedIntervalsToSkip(compactionConfigList).forEach((dataSource, intervals) -> intervalsToSkipCompaction.computeIfAbsent(dataSource, ds -> new ArrayList<>()).addAll(intervals));
            final CompactionSegmentIterator iterator = policy.reset(compactionConfigs, dataSources, intervalsToSkipCompaction);
            int totalCapacity;
            if (dynamicConfig.isUseAutoScaleSlots()) {
                try {
                    totalCapacity = indexingServiceClient.getTotalWorkerCapacityWithAutoScale();
                } catch (Exception e) {
                    LOG.warn("Failed to get total worker capacity with auto scale slots. Falling back to current capacity count");
                    totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
                }
            } else {
                totalCapacity = indexingServiceClient.getTotalWorkerCapacity();
            }
            final int compactionTaskCapacity = (int) Math.min(totalCapacity * dynamicConfig.getCompactionTaskSlotRatio(), dynamicConfig.getMaxCompactionTaskSlots());
            final int numAvailableCompactionTaskSlots;
            if (numEstimatedNonCompleteCompactionTasks > 0) {
                numAvailableCompactionTaskSlots = Math.max(0, compactionTaskCapacity - numEstimatedNonCompleteCompactionTasks);
            } else {
                // compactionTaskCapacity might be 0 if totalWorkerCapacity is low.
                // This guarantees that at least one slot is available if
                // compaction is enabled and numEstimatedNonCompleteCompactionTasks is 0.
                numAvailableCompactionTaskSlots = Math.max(1, compactionTaskCapacity);
            }
            LOG.info("Found [%d] available task slots for compaction out of [%d] max compaction task capacity", numAvailableCompactionTaskSlots, compactionTaskCapacity);
            stats.addToGlobalStat(AVAILABLE_COMPACTION_TASK_SLOT, numAvailableCompactionTaskSlots);
            stats.addToGlobalStat(MAX_COMPACTION_TASK_SLOT, compactionTaskCapacity);
            final Map<String, AutoCompactionSnapshot.Builder> currentRunAutoCompactionSnapshotBuilders = new HashMap<>();
            if (numAvailableCompactionTaskSlots > 0) {
                stats.accumulate(doRun(compactionConfigs, currentRunAutoCompactionSnapshotBuilders, numAvailableCompactionTaskSlots, iterator));
            } else {
                stats.accumulate(makeStats(currentRunAutoCompactionSnapshotBuilders, 0, iterator));
            }
        } else {
            LOG.info("compactionConfig is empty. Skip.");
            autoCompactionSnapshotPerDataSource.set(new HashMap<>());
        }
    } else {
        LOG.info("maxCompactionTaskSlots was set to 0. Skip compaction");
        autoCompactionSnapshotPerDataSource.set(new HashMap<>());
    }
    return params.buildFromExisting().withCoordinatorStats(stats).build();
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) Granularity(org.apache.druid.java.util.common.granularity.Granularity) Inject(com.google.inject.Inject) ClientCompactionTaskDimensionsSpec(org.apache.druid.client.indexing.ClientCompactionTaskDimensionsSpec) DruidCoordinatorRuntimeParams(org.apache.druid.server.coordinator.DruidCoordinatorRuntimeParams) IndexingServiceClient(org.apache.druid.client.indexing.IndexingServiceClient) HashMap(java.util.HashMap) CoordinatorStats(org.apache.druid.server.coordinator.CoordinatorStats) AtomicReference(java.util.concurrent.atomic.AtomicReference) Function(java.util.function.Function) AutoCompactionSnapshot(org.apache.druid.server.coordinator.AutoCompactionSnapshot) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) ArrayList(java.util.ArrayList) TaskPayloadResponse(org.apache.druid.client.indexing.TaskPayloadResponse) Interval(org.joda.time.Interval) DruidCoordinatorConfig(org.apache.druid.server.coordinator.DruidCoordinatorConfig) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) Nullable(javax.annotation.Nullable) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) CompactionStatistics(org.apache.druid.server.coordinator.CompactionStatistics) CoordinatorCompactionConfig(org.apache.druid.server.coordinator.CoordinatorCompactionConfig) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) List(java.util.List) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) GranularityType(org.apache.druid.java.util.common.granularity.GranularityType) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) DataSegment(org.apache.druid.timeline.DataSegment) VisibleForTesting(com.google.common.annotations.VisibleForTesting) CoordinatorStats(org.apache.druid.server.coordinator.CoordinatorStats) CoordinatorCompactionConfig(org.apache.druid.server.coordinator.CoordinatorCompactionConfig) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) TaskPayloadResponse(org.apache.druid.client.indexing.TaskPayloadResponse) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Interval(org.joda.time.Interval)

Example 2 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class HttpIndexingServiceClient method getActiveTasks.

@Override
public List<TaskStatusPlus> getActiveTasks() {
    // Must retrieve waiting, then pending, then running, so if tasks move from one state to the next between
    // calls then we still catch them. (Tasks always go waiting -> pending -> running.)
    // 
    // Consider switching to new-style /druid/indexer/v1/tasks API in the future.
    final List<TaskStatusPlus> tasks = new ArrayList<>();
    final Set<String> taskIdsSeen = new HashSet<>();
    final Iterable<TaskStatusPlus> activeTasks = Iterables.concat(getTasks("waitingTasks"), getTasks("pendingTasks"), getTasks("runningTasks"));
    for (TaskStatusPlus task : activeTasks) {
        // for example, and we see it twice.)
        if (taskIdsSeen.add(task.getId())) {
            tasks.add(task);
        }
    }
    return tasks;
}
Also used : ArrayList(java.util.ArrayList) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) HashSet(java.util.HashSet)

Example 3 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class TaskMonitor method start.

public void start(long taskStatusCheckingPeriod) {
    synchronized (startStopLock) {
        running = true;
        log.info("Starting taskMonitor");
        // NOTE: This polling can be improved to event-driven pushing by registering TaskRunnerListener to TaskRunner.
        // That listener should be able to send the events reported to TaskRunner to this TaskMonitor.
        taskStatusChecker.scheduleAtFixedRate(() -> {
            try {
                final Iterator<Entry<String, MonitorEntry>> iterator = runningTasks.entrySet().iterator();
                while (iterator.hasNext()) {
                    final Entry<String, MonitorEntry> entry = iterator.next();
                    final String specId = entry.getKey();
                    final MonitorEntry monitorEntry = entry.getValue();
                    final String taskId = monitorEntry.runningTask.getId();
                    final TaskStatusResponse taskStatusResponse = indexingServiceClient.getTaskStatus(taskId);
                    final TaskStatusPlus taskStatus = taskStatusResponse.getStatus();
                    if (taskStatus != null) {
                        switch(Preconditions.checkNotNull(taskStatus.getStatusCode(), "taskState")) {
                            case SUCCESS:
                                // Succeeded tasks must have sent a report
                                if (!reportsMap.containsKey(taskId)) {
                                    throw new ISE("Missing reports from task[%s]!", taskId);
                                }
                                incrementNumSucceededTasks();
                                // Remote the current entry after updating taskHistories to make sure that task history
                                // exists either runningTasks or taskHistories.
                                monitorEntry.setLastStatus(taskStatus);
                                iterator.remove();
                                break;
                            case FAILED:
                                // We don't need reports from failed tasks
                                reportsMap.remove(taskId);
                                incrementNumFailedTasks();
                                log.warn("task[%s] failed!", taskId);
                                if (monitorEntry.numTries() < maxRetry) {
                                    log.info("We still have more chances[%d/%d] to process the spec[%s].", monitorEntry.numTries(), maxRetry, monitorEntry.spec.getId());
                                    retry(specId, monitorEntry, taskStatus);
                                } else {
                                    log.error("spec[%s] failed after [%d] tries", monitorEntry.spec.getId(), monitorEntry.numTries());
                                    // Remote the current entry after updating taskHistories to make sure that task history
                                    // exists either runningTasks or taskHistories.
                                    monitorEntry.setLastStatus(taskStatus);
                                    iterator.remove();
                                }
                                break;
                            case RUNNING:
                                monitorEntry.updateStatus(taskStatus);
                                break;
                            default:
                                throw new ISE("Unknown taskStatus[%s] for task[%s[", taskStatus.getStatusCode(), taskId);
                        }
                    }
                }
            } catch (Throwable t) {
                // Note that we only log the message here so that task monitoring continues to happen or else
                // the task which created this monitor will keep on waiting endlessly assuming monitored tasks
                // are still running.
                log.error(t, "Error while monitoring");
            }
        }, taskStatusCheckingPeriod, taskStatusCheckingPeriod, TimeUnit.MILLISECONDS);
    }
}
Also used : Entry(java.util.Map.Entry) TaskStatusResponse(org.apache.druid.client.indexing.TaskStatusResponse) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) ISE(org.apache.druid.java.util.common.ISE)

Example 4 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class ParallelIndexSupervisorTaskKillTest method testSubTaskFail.

@Test(timeout = 5000L)
public void testSubTaskFail() throws Exception {
    final ParallelIndexSupervisorTask task = newTask(Intervals.of("2017/2018"), new ParallelIndexIOConfig(null, new TestInputSource(Pair.of(new TestInput(10L, TaskState.FAILED), 1), Pair.of(new TestInput(Integer.MAX_VALUE, TaskState.FAILED), 3)), new NoopInputFormat(), false, null));
    final TaskActionClient actionClient = createActionClient(task);
    final TaskToolbox toolbox = createTaskToolbox(task, actionClient);
    prepareTaskForLocking(task);
    Assert.assertTrue(task.isReady(actionClient));
    final TaskStatus taskStatus = task.run(toolbox);
    Assert.assertEquals("Failed in phase[segment generation]. See task logs for details.", taskStatus.getErrorMsg());
    Assert.assertEquals(TaskState.FAILED, taskStatus.getStatusCode());
    final SinglePhaseParallelIndexTaskRunner runner = (SinglePhaseParallelIndexTaskRunner) task.getCurrentRunner();
    Assert.assertTrue(runner.getRunningTaskIds().isEmpty());
    final List<SubTaskSpec<SinglePhaseSubTask>> completeSubTaskSpecs = runner.getCompleteSubTaskSpecs();
    Assert.assertEquals(1, completeSubTaskSpecs.size());
    final TaskHistory<SinglePhaseSubTask> history = runner.getCompleteSubTaskSpecAttemptHistory(completeSubTaskSpecs.get(0).getId());
    Assert.assertNotNull(history);
    Assert.assertEquals(3, history.getAttemptHistory().size());
    for (TaskStatusPlus status : history.getAttemptHistory()) {
        Assert.assertEquals(TaskState.FAILED, status.getStatusCode());
    }
    Assert.assertEquals(3, runner.getTaskMonitor().getNumCanceledTasks());
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) Test(org.junit.Test)

Example 5 with TaskStatusPlus

use of org.apache.druid.indexer.TaskStatusPlus in project druid by druid-io.

the class ParallelIndexSupervisorTaskResourceTest method testAPIs.

@Test(timeout = 20000L)
public void testAPIs() throws Exception {
    task = newTask(Intervals.of("2017/2018"), new ParallelIndexIOConfig(null, new TestInputSource(IntStream.range(0, NUM_SUB_TASKS).boxed().collect(Collectors.toList())), new NoopInputFormat(), false, null));
    getIndexingServiceClient().runTask(task.getId(), task);
    Thread.sleep(1000);
    final SinglePhaseParallelIndexTaskRunner runner = (SinglePhaseParallelIndexTaskRunner) task.getCurrentRunner();
    Assert.assertNotNull("runner is null", runner);
    // test getMode
    Response response = task.getMode(newRequest());
    Assert.assertEquals(200, response.getStatus());
    Assert.assertEquals("parallel", response.getEntity());
    // test expectedNumSucceededTasks
    response = task.getProgress(newRequest());
    Assert.assertEquals(200, response.getStatus());
    Assert.assertEquals(NUM_SUB_TASKS, ((ParallelIndexingPhaseProgress) response.getEntity()).getEstimatedExpectedSucceeded());
    // We use polling to check the state of taskMonitor in this test.
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getRunning) < NUM_SUB_TASKS) {
        Thread.sleep(100);
    }
    int succeededTasks = 0;
    int failedTasks = 0;
    checkState(succeededTasks, failedTasks, buildStateMap());
    // numRunningTasks and numSucceededTasks after some successful subTasks
    succeededTasks += 2;
    for (int i = 0; i < succeededTasks; i++) {
        runningTasks.get(0).setState(TaskState.SUCCESS);
    }
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getSucceeded) < succeededTasks) {
        Thread.sleep(100);
    }
    checkState(succeededTasks, failedTasks, buildStateMap());
    // numRunningTasks and numSucceededTasks after some failed subTasks
    failedTasks += 3;
    for (int i = 0; i < failedTasks; i++) {
        runningTasks.get(0).setState(TaskState.FAILED);
    }
    // Wait for new tasks to be started
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getFailed) < failedTasks || runningTasks.size() < NUM_SUB_TASKS - succeededTasks) {
        Thread.sleep(100);
    }
    checkState(succeededTasks, failedTasks, buildStateMap());
    // Make sure only one subTask is running
    succeededTasks += 7;
    for (int i = 0; i < 7; i++) {
        runningTasks.get(0).setState(TaskState.SUCCESS);
    }
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getSucceeded) < succeededTasks) {
        Thread.sleep(100);
    }
    checkState(succeededTasks, failedTasks, buildStateMap());
    Assert.assertEquals(1, runningSpecs.size());
    final String lastRunningSpecId = runningSpecs.keySet().iterator().next();
    final List<TaskStatusPlus> taskHistory = taskHistories.get(lastRunningSpecId);
    // This should be a failed task history because new tasks appear later in runningTasks.
    Assert.assertEquals(1, taskHistory.size());
    // Test one more failure
    runningTasks.get(0).setState(TaskState.FAILED);
    failedTasks++;
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getFailed) < failedTasks) {
        Thread.sleep(100);
    }
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getRunning) < 1) {
        Thread.sleep(100);
    }
    checkState(succeededTasks, failedTasks, buildStateMap());
    Assert.assertEquals(2, taskHistory.size());
    runningTasks.get(0).setState(TaskState.SUCCESS);
    succeededTasks++;
    while (getNumSubTasks(ParallelIndexingPhaseProgress::getSucceeded) < succeededTasks) {
        Thread.sleep(100);
    }
    Assert.assertEquals(TaskState.SUCCESS, getIndexingServiceClient().waitToFinish(task, 1000, TimeUnit.MILLISECONDS).getStatusCode());
}
Also used : Response(javax.ws.rs.core.Response) TaskStatusPlus(org.apache.druid.indexer.TaskStatusPlus) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) Test(org.junit.Test)

Aggregations

TaskStatusPlus (org.apache.druid.indexer.TaskStatusPlus)21 ImmutableList (com.google.common.collect.ImmutableList)10 List (java.util.List)9 Test (org.junit.Test)9 ArrayList (java.util.ArrayList)8 Response (javax.ws.rs.core.Response)7 TaskState (org.apache.druid.indexer.TaskState)7 ImmutableMap (com.google.common.collect.ImmutableMap)6 TaskStatus (org.apache.druid.indexer.TaskStatus)6 HashMap (java.util.HashMap)5 TaskInfo (org.apache.druid.indexer.TaskInfo)5 Collection (java.util.Collection)4 Map (java.util.Map)4 Collectors (java.util.stream.Collectors)4 Nullable (javax.annotation.Nullable)4 Task (org.apache.druid.indexing.common.task.Task)4 StringUtils (org.apache.druid.java.util.common.StringUtils)4 Function (com.google.common.base.Function)3 ResourceFilters (com.sun.jersey.spi.container.ResourceFilters)3 Collections (java.util.Collections)3