Search in sources :

Example 1 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class IndexTaskClient method submitRequest.

/**
 * Sends an HTTP request to the task of the specified {@code taskId} and returns a response if it succeeded.
 */
protected <IntermediateType, FinalType> FinalType submitRequest(String taskId, // nullable if content is empty
@Nullable String mediaType, HttpMethod method, String encodedPathSuffix, @Nullable String encodedQueryString, byte[] content, HttpResponseHandler<IntermediateType, FinalType> responseHandler, boolean retry) throws IOException, ChannelException, NoTaskLocationException {
    final RetryPolicy retryPolicy = retryPolicyFactory.makeRetryPolicy();
    while (true) {
        String path = StringUtils.format("%s/%s/%s", BASE_PATH, StringUtils.urlEncode(taskId), encodedPathSuffix);
        Optional<TaskStatus> status = taskInfoProvider.getTaskStatus(taskId);
        if (!status.isPresent() || !status.get().isRunnable()) {
            throw new TaskNotRunnableException(StringUtils.format("Aborting request because task [%s] is not runnable", taskId));
        }
        final TaskLocation location = taskInfoProvider.getTaskLocation(taskId);
        if (location.equals(TaskLocation.unknown())) {
            throw new NoTaskLocationException(StringUtils.format("No TaskLocation available for task [%s]", taskId));
        }
        final Request request = createRequest(taskId, location, path, encodedQueryString, method, mediaType, content);
        Either<StringFullResponseHolder, FinalType> response = null;
        try {
            // Netty throws some annoying exceptions if a connection can't be opened, which happens relatively frequently
            // for tasks that happen to still be starting up, so test the connection first to keep the logs clean.
            checkConnection(request.getUrl().getHost(), request.getUrl().getPort());
            response = submitRequest(request, responseHandler);
            if (response.isValue()) {
                return response.valueOrThrow();
            } else {
                final StringBuilder exceptionMessage = new StringBuilder();
                final HttpResponseStatus httpResponseStatus = response.error().getStatus();
                final String httpResponseContent = response.error().getContent();
                exceptionMessage.append("Received server error with status [").append(httpResponseStatus).append("]");
                if (!Strings.isNullOrEmpty(httpResponseContent)) {
                    final String choppedMessage = StringUtils.chop(StringUtils.nullToEmptyNonDruidDataString(httpResponseContent), 1000);
                    exceptionMessage.append("; first 1KB of body: ").append(choppedMessage);
                }
                if (httpResponseStatus.getCode() == 400) {
                    // don't bother retrying if it's a bad request
                    throw new IAE(exceptionMessage.toString());
                } else {
                    throw new IOE(exceptionMessage.toString());
                }
            }
        } catch (IOException | ChannelException e) {
            // Since workers are free to move tasks around to different ports, there is a chance that a task may have been
            // moved but our view of its location has not been updated yet from ZK. To detect this case, we send a header
            // identifying our expected recipient in the request; if this doesn't correspond to the worker we messaged, the
            // worker will return an HTTP 404 with its ID in the response header. If we get a mismatching task ID, then
            // we will wait for a short period then retry the request indefinitely, expecting the task's location to
            // eventually be updated.
            final Duration delay;
            if (response != null && !response.isValue() && response.error().getStatus().equals(HttpResponseStatus.NOT_FOUND)) {
                String headerId = StringUtils.urlDecode(response.error().getResponse().headers().get(ChatHandlerResource.TASK_ID_HEADER));
                if (headerId != null && !headerId.equals(taskId)) {
                    log.warn("Expected worker to have taskId [%s] but has taskId [%s], will retry in [%d]s", taskId, headerId, TASK_MISMATCH_RETRY_DELAY_SECONDS);
                    delay = Duration.standardSeconds(TASK_MISMATCH_RETRY_DELAY_SECONDS);
                } else {
                    delay = retryPolicy.getAndIncrementRetryDelay();
                }
            } else {
                delay = retryPolicy.getAndIncrementRetryDelay();
            }
            final String urlForLog = request.getUrl().toString();
            if (!retry) {
                // if retry=false, we probably aren't too concerned if the operation doesn't succeed (i.e. the request was
                // for informational purposes only); log at INFO instead of WARN.
                log.noStackTrace().info(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else if (delay == null) {
                // When retrying, log the final failure at WARN level, since it is likely to be bad news.
                log.warn(e, "submitRequest failed for [%s]", urlForLog);
                throw e;
            } else {
                try {
                    final long sleepTime = delay.getMillis();
                    // When retrying, log non-final failures at INFO level.
                    log.noStackTrace().info(e, "submitRequest failed for [%s]; will try again in [%s]", urlForLog, new Duration(sleepTime).toString());
                    Thread.sleep(sleepTime);
                } catch (InterruptedException e2) {
                    Thread.currentThread().interrupt();
                    e.addSuppressed(e2);
                    throw new RuntimeException(e);
                }
            }
        } catch (NoTaskLocationException e) {
            log.info("No TaskLocation available for task [%s], this task may not have been assigned to a worker yet " + "or may have already completed", taskId);
            throw e;
        } catch (Exception e) {
            log.warn(e, "Exception while sending request");
            throw e;
        }
    }
}
Also used : HttpResponseStatus(org.jboss.netty.handler.codec.http.HttpResponseStatus) Request(org.apache.druid.java.util.http.client.Request) Duration(org.joda.time.Duration) IOException(java.io.IOException) TaskStatus(org.apache.druid.indexer.TaskStatus) IAE(org.apache.druid.java.util.common.IAE) TaskLocation(org.apache.druid.indexer.TaskLocation) MalformedURLException(java.net.MalformedURLException) ChannelException(org.jboss.netty.channel.ChannelException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) StringFullResponseHolder(org.apache.druid.java.util.http.client.response.StringFullResponseHolder) IOE(org.apache.druid.java.util.common.IOE) ChannelException(org.jboss.netty.channel.ChannelException)

Example 2 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class ParallelIndexSupervisorTask method runSinglePhaseParallel.

/**
 * Run the single phase parallel indexing for best-effort rollup. In this mode, each sub task created by
 * the supervisor task reads data and generates segments individually.
 */
private TaskStatus runSinglePhaseParallel(TaskToolbox toolbox) throws Exception {
    ingestionState = IngestionState.BUILD_SEGMENTS;
    ParallelIndexTaskRunner<SinglePhaseSubTask, PushedSegmentsReport> parallelSinglePhaseRunner = createRunner(toolbox, this::createSinglePhaseTaskRunner);
    final TaskState state = runNextPhase(parallelSinglePhaseRunner);
    TaskStatus taskStatus;
    if (state.isSuccess()) {
        // noinspection ConstantConditions
        publishSegments(toolbox, parallelSinglePhaseRunner.getReports());
        if (awaitSegmentAvailabilityTimeoutMillis > 0) {
            waitForSegmentAvailability(parallelSinglePhaseRunner.getReports());
        }
        taskStatus = TaskStatus.success(getId());
    } else {
        // there is only success or failure after running....
        Preconditions.checkState(state.isFailure(), "Unrecognized state after task is complete[%s]", state);
        final String errorMessage;
        if (parallelSinglePhaseRunner.getStopReason() != null) {
            errorMessage = parallelSinglePhaseRunner.getStopReason();
        } else {
            errorMessage = StringUtils.format(TASK_PHASE_FAILURE_MSG, parallelSinglePhaseRunner.getName());
        }
        taskStatus = TaskStatus.failure(getId(), errorMessage);
    }
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
    return taskStatus;
}
Also used : TaskStatus(org.apache.druid.indexer.TaskStatus) TaskState(org.apache.druid.indexer.TaskState)

Example 3 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class ParallelIndexSupervisorTask method runHashPartitionMultiPhaseParallel.

@VisibleForTesting
TaskStatus runHashPartitionMultiPhaseParallel(TaskToolbox toolbox) throws Exception {
    TaskState state;
    ParallelIndexIngestionSpec ingestionSchemaToUse = ingestionSchema;
    if (!(ingestionSchema.getTuningConfig().getPartitionsSpec() instanceof HashedPartitionsSpec)) {
        // only range and hash partitioning is supported for multiphase parallel ingestion, see runMultiPhaseParallel()
        throw new ISE("forceGuaranteedRollup is set but partitionsSpec [%s] is not a single_dim or hash partition spec.", ingestionSchema.getTuningConfig().getPartitionsSpec());
    }
    final Map<Interval, Integer> intervalToNumShards;
    HashedPartitionsSpec partitionsSpec = (HashedPartitionsSpec) ingestionSchema.getTuningConfig().getPartitionsSpec();
    final boolean needsInputSampling = partitionsSpec.getNumShards() == null || ingestionSchemaToUse.getDataSchema().getGranularitySpec().inputIntervals().isEmpty();
    if (needsInputSampling) {
        // 0. need to determine intervals and numShards by scanning the data
        LOG.info("Needs to determine intervals or numShards, beginning %s phase.", PartialDimensionCardinalityTask.TYPE);
        ParallelIndexTaskRunner<PartialDimensionCardinalityTask, DimensionCardinalityReport> cardinalityRunner = createRunner(toolbox, this::createPartialDimensionCardinalityRunner);
        state = runNextPhase(cardinalityRunner);
        if (state.isFailure()) {
            String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, cardinalityRunner.getName());
            return TaskStatus.failure(getId(), errMsg);
        }
        if (cardinalityRunner.getReports().isEmpty()) {
            String msg = "No valid rows for hash partitioning." + " All rows may have invalid timestamps or have been filtered out.";
            LOG.warn(msg);
            return TaskStatus.success(getId(), msg);
        }
        if (partitionsSpec.getNumShards() == null) {
            int effectiveMaxRowsPerSegment = partitionsSpec.getMaxRowsPerSegment() == null ? PartitionsSpec.DEFAULT_MAX_ROWS_PER_SEGMENT : partitionsSpec.getMaxRowsPerSegment();
            LOG.info("effective maxRowsPerSegment is: " + effectiveMaxRowsPerSegment);
            intervalToNumShards = determineNumShardsFromCardinalityReport(cardinalityRunner.getReports().values(), effectiveMaxRowsPerSegment);
        } else {
            intervalToNumShards = CollectionUtils.mapValues(mergeCardinalityReports(cardinalityRunner.getReports().values()), k -> partitionsSpec.getNumShards());
        }
        ingestionSchemaToUse = rewriteIngestionSpecWithIntervalsIfMissing(ingestionSchemaToUse, intervalToNumShards.keySet());
    } else {
        // numShards will be determined in PartialHashSegmentGenerateTask
        intervalToNumShards = null;
    }
    // 1. Partial segment generation phase
    final ParallelIndexIngestionSpec segmentCreateIngestionSpec = ingestionSchemaToUse;
    ParallelIndexTaskRunner<PartialHashSegmentGenerateTask, GeneratedPartitionsReport> indexingRunner = createRunner(toolbox, f -> createPartialHashSegmentGenerateRunner(toolbox, segmentCreateIngestionSpec, intervalToNumShards));
    state = runNextPhase(indexingRunner);
    if (state.isFailure()) {
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, indexingRunner.getName());
        return TaskStatus.failure(getId(), errMsg);
    }
    // 2. Partial segment merge phase
    // partition (interval, partitionId) -> partition locations
    Map<Partition, List<PartitionLocation>> partitionToLocations = getPartitionToLocations(indexingRunner.getReports());
    final List<PartialSegmentMergeIOConfig> ioConfigs = createGenericMergeIOConfigs(ingestionSchema.getTuningConfig().getTotalNumMergeTasks(), partitionToLocations);
    final ParallelIndexIngestionSpec segmentMergeIngestionSpec = ingestionSchemaToUse;
    final ParallelIndexTaskRunner<PartialGenericSegmentMergeTask, PushedSegmentsReport> mergeRunner = createRunner(toolbox, tb -> createPartialGenericSegmentMergeRunner(tb, ioConfigs, segmentMergeIngestionSpec));
    state = runNextPhase(mergeRunner);
    TaskStatus taskStatus;
    if (state.isSuccess()) {
        // noinspection ConstantConditions
        publishSegments(toolbox, mergeRunner.getReports());
        if (awaitSegmentAvailabilityTimeoutMillis > 0) {
            waitForSegmentAvailability(mergeRunner.getReports());
        }
        taskStatus = TaskStatus.success(getId());
    } else {
        // there is only success or failure after running....
        Preconditions.checkState(state.isFailure(), "Unrecognized state after task is complete[%s]", state);
        String errMsg = StringUtils.format(TASK_PHASE_FAILURE_MSG, mergeRunner.getName());
        taskStatus = TaskStatus.failure(getId(), errMsg);
    }
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports(taskStatus, segmentAvailabilityConfirmationCompleted));
    return taskStatus;
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) PartitionBoundaries(org.apache.druid.timeline.partition.PartitionBoundaries) Produces(javax.ws.rs.Produces) IngestionState(org.apache.druid.indexer.IngestionState) Pair(org.apache.druid.java.util.common.Pair) MediaType(javax.ws.rs.core.MediaType) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) StringDistribution(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistribution) AbstractBatchIndexTask(org.apache.druid.indexing.common.task.AbstractBatchIndexTask) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskState(org.apache.druid.indexer.TaskState) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) IndexTaskUtils(org.apache.druid.indexing.common.task.IndexTaskUtils) Granularity(org.apache.druid.java.util.common.granularity.Granularity) GET(javax.ws.rs.GET) Tasks(org.apache.druid.indexing.common.task.Tasks) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) IndexTask(org.apache.druid.indexing.common.task.IndexTask) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) StringSketchMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringSketchMerger) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) StringDistributionMerger(org.apache.druid.indexing.common.task.batch.parallel.distribution.StringDistributionMerger) IOException(java.io.IOException) TreeMap(java.util.TreeMap) ChatHandlers(org.apache.druid.segment.realtime.firehose.ChatHandlers) Preconditions(com.google.common.base.Preconditions) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) SubTaskSpecStatus(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTaskRunner.SubTaskSpecStatus) HllSketch(org.apache.datasketches.hll.HllSketch) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) Memory(org.apache.datasketches.memory.Memory) TaskResource(org.apache.druid.indexing.common.task.TaskResource) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) Consumes(javax.ws.rs.Consumes) Union(org.apache.datasketches.hll.Union) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Task(org.apache.druid.indexing.common.task.Task) SmileMediaTypes(com.fasterxml.jackson.jaxrs.smile.SmileMediaTypes) Context(javax.ws.rs.core.Context) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Action(org.apache.druid.server.security.Action) Collectors(java.util.stream.Collectors) MaxAllowedLocksExceededException(org.apache.druid.indexing.common.task.batch.MaxAllowedLocksExceededException) Objects(java.util.Objects) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) CurrentSubTaskHolder(org.apache.druid.indexing.common.task.CurrentSubTaskHolder) Logger(org.apache.druid.java.util.common.logger.Logger) PathParam(javax.ws.rs.PathParam) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) HashSet(java.util.HashSet) InputSource(org.apache.druid.data.input.InputSource) RowIngestionMetersTotals(org.apache.druid.segment.incremental.RowIngestionMetersTotals) Status(javax.ws.rs.core.Response.Status) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) POST(javax.ws.rs.POST) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) IntermediaryDataManager(org.apache.druid.indexing.worker.shuffle.IntermediaryDataManager) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Collections(java.util.Collections) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) ISE(org.apache.druid.java.util.common.ISE) ArrayList(java.util.ArrayList) List(java.util.List) TaskStatus(org.apache.druid.indexer.TaskStatus) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TaskState(org.apache.druid.indexer.TaskState) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 4 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class QuotableWhiteSpaceSplitter method run.

@Override
public ListenableFuture<TaskStatus> run(final Task task) {
    synchronized (tasks) {
        tasks.computeIfAbsent(task.getId(), k -> new ForkingTaskRunnerWorkItem(task, exec.submit(new Callable<TaskStatus>() {

            @Override
            public TaskStatus call() {
                final String attemptUUID = UUID.randomUUID().toString();
                final File taskDir = taskConfig.getTaskDir(task.getId());
                final File attemptDir = new File(taskDir, attemptUUID);
                final ProcessHolder processHolder;
                final String childHost = node.getHost();
                int childPort = -1;
                int tlsChildPort = -1;
                if (node.isEnablePlaintextPort()) {
                    childPort = portFinder.findUnusedPort();
                }
                if (node.isEnableTlsPort()) {
                    tlsChildPort = portFinder.findUnusedPort();
                }
                final TaskLocation taskLocation = TaskLocation.create(childHost, childPort, tlsChildPort);
                try {
                    final Closer closer = Closer.create();
                    try {
                        FileUtils.mkdirp(attemptDir);
                        final File taskFile = new File(taskDir, "task.json");
                        final File statusFile = new File(attemptDir, "status.json");
                        final File logFile = new File(taskDir, "log");
                        final File reportsFile = new File(attemptDir, "report.json");
                        // time to adjust process holders
                        synchronized (tasks) {
                            final ForkingTaskRunnerWorkItem taskWorkItem = tasks.get(task.getId());
                            if (taskWorkItem == null) {
                                LOGGER.makeAlert("TaskInfo disappeared!").addData("task", task.getId()).emit();
                                throw new ISE("TaskInfo disappeared for task[%s]!", task.getId());
                            }
                            if (taskWorkItem.shutdown) {
                                throw new IllegalStateException("Task has been shut down!");
                            }
                            if (taskWorkItem.processHolder != null) {
                                LOGGER.makeAlert("TaskInfo already has a processHolder").addData("task", task.getId()).emit();
                                throw new ISE("TaskInfo already has processHolder for task[%s]!", task.getId());
                            }
                            final List<String> command = new ArrayList<>();
                            final String taskClasspath;
                            if (task.getClasspathPrefix() != null && !task.getClasspathPrefix().isEmpty()) {
                                taskClasspath = Joiner.on(File.pathSeparator).join(task.getClasspathPrefix(), config.getClasspath());
                            } else {
                                taskClasspath = config.getClasspath();
                            }
                            command.add(config.getJavaCommand());
                            command.add("-cp");
                            command.add(taskClasspath);
                            Iterables.addAll(command, new QuotableWhiteSpaceSplitter(config.getJavaOpts()));
                            Iterables.addAll(command, config.getJavaOptsArray());
                            // Override task specific javaOpts
                            Object taskJavaOpts = task.getContextValue(ForkingTaskRunnerConfig.JAVA_OPTS_PROPERTY);
                            if (taskJavaOpts != null) {
                                Iterables.addAll(command, new QuotableWhiteSpaceSplitter((String) taskJavaOpts));
                            }
                            for (String propName : props.stringPropertyNames()) {
                                for (String allowedPrefix : config.getAllowedPrefixes()) {
                                    // See https://github.com/apache/druid/issues/1841
                                    if (propName.startsWith(allowedPrefix) && !ForkingTaskRunnerConfig.JAVA_OPTS_PROPERTY.equals(propName) && !ForkingTaskRunnerConfig.JAVA_OPTS_ARRAY_PROPERTY.equals(propName)) {
                                        command.add(StringUtils.format("-D%s=%s", propName, props.getProperty(propName)));
                                    }
                                }
                            }
                            // Override child JVM specific properties
                            for (String propName : props.stringPropertyNames()) {
                                if (propName.startsWith(CHILD_PROPERTY_PREFIX)) {
                                    command.add(StringUtils.format("-D%s=%s", propName.substring(CHILD_PROPERTY_PREFIX.length()), props.getProperty(propName)));
                                }
                            }
                            // Override task specific properties
                            final Map<String, Object> context = task.getContext();
                            if (context != null) {
                                for (String propName : context.keySet()) {
                                    if (propName.startsWith(CHILD_PROPERTY_PREFIX)) {
                                        command.add(StringUtils.format("-D%s=%s", propName.substring(CHILD_PROPERTY_PREFIX.length()), task.getContextValue(propName)));
                                    }
                                }
                            }
                            // Add dataSource, taskId and taskType for metrics or logging
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.DATASOURCE, task.getDataSource()));
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.TASK_ID, task.getId()));
                            command.add(StringUtils.format("-D%s%s=%s", MonitorsConfig.METRIC_DIMENSION_PREFIX, DruidMetrics.TASK_TYPE, task.getType()));
                            command.add(StringUtils.format("-Ddruid.host=%s", childHost));
                            command.add(StringUtils.format("-Ddruid.plaintextPort=%d", childPort));
                            command.add(StringUtils.format("-Ddruid.tlsPort=%d", tlsChildPort));
                            // Let tasks know where they are running on.
                            // This information is used in native parallel indexing with shuffle.
                            command.add(StringUtils.format("-Ddruid.task.executor.service=%s", node.getServiceName()));
                            command.add(StringUtils.format("-Ddruid.task.executor.host=%s", node.getHost()));
                            command.add(StringUtils.format("-Ddruid.task.executor.plaintextPort=%d", node.getPlaintextPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.enablePlaintextPort=%s", node.isEnablePlaintextPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.tlsPort=%d", node.getTlsPort()));
                            command.add(StringUtils.format("-Ddruid.task.executor.enableTlsPort=%s", node.isEnableTlsPort()));
                            // These are not enabled per default to allow the user to either set or not set them
                            // Users are highly suggested to be set in druid.indexer.runner.javaOpts
                            // See org.apache.druid.concurrent.TaskThreadPriority#getThreadPriorityFromTaskPriority(int)
                            // for more information
                            // command.add("-XX:+UseThreadPriorities");
                            // command.add("-XX:ThreadPriorityPolicy=42");
                            command.add("org.apache.druid.cli.Main");
                            command.add("internal");
                            command.add("peon");
                            command.add(taskFile.toString());
                            command.add(statusFile.toString());
                            command.add(reportsFile.toString());
                            String nodeType = task.getNodeType();
                            if (nodeType != null) {
                                command.add("--nodeType");
                                command.add(nodeType);
                            }
                            // join queries
                            if (task.supportsQueries()) {
                                command.add("--loadBroadcastSegments");
                                command.add("true");
                            }
                            if (!taskFile.exists()) {
                                jsonMapper.writeValue(taskFile, task);
                            }
                            LOGGER.info("Running command: %s", getMaskedCommand(startupLoggingConfig.getMaskProperties(), command));
                            taskWorkItem.processHolder = runTaskProcess(command, logFile, taskLocation);
                            processHolder = taskWorkItem.processHolder;
                            processHolder.registerWithCloser(closer);
                        }
                        TaskRunnerUtils.notifyLocationChanged(listeners, task.getId(), taskLocation);
                        TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), TaskStatus.running(task.getId()));
                        LOGGER.info("Logging task %s output to: %s", task.getId(), logFile);
                        final int exitCode = waitForTaskProcessToComplete(task, processHolder, logFile, reportsFile);
                        final TaskStatus status;
                        if (exitCode == 0) {
                            LOGGER.info("Process exited successfully for task: %s", task.getId());
                            // Process exited successfully
                            status = jsonMapper.readValue(statusFile, TaskStatus.class);
                        } else {
                            LOGGER.error("Process exited with code[%d] for task: %s", exitCode, task.getId());
                            // Process exited unsuccessfully
                            status = TaskStatus.failure(task.getId(), StringUtils.format("Task execution process exited unsuccessfully with code[%s]. " + "See middleManager logs for more details.", exitCode));
                        }
                        TaskRunnerUtils.notifyStatusChanged(listeners, task.getId(), status);
                        return status;
                    } catch (Throwable t) {
                        throw closer.rethrow(t);
                    } finally {
                        closer.close();
                    }
                } catch (Throwable t) {
                    LOGGER.info(t, "Exception caught during execution");
                    throw new RuntimeException(t);
                } finally {
                    try {
                        synchronized (tasks) {
                            final ForkingTaskRunnerWorkItem taskWorkItem = tasks.remove(task.getId());
                            if (taskWorkItem != null && taskWorkItem.processHolder != null) {
                                taskWorkItem.processHolder.shutdown();
                            }
                            if (!stopping) {
                                saveRunningTasks();
                            }
                        }
                        if (node.isEnablePlaintextPort()) {
                            portFinder.markPortUnused(childPort);
                        }
                        if (node.isEnableTlsPort()) {
                            portFinder.markPortUnused(tlsChildPort);
                        }
                        try {
                            if (!stopping && taskDir.exists()) {
                                FileUtils.deleteDirectory(taskDir);
                                LOGGER.info("Removing task directory: %s", taskDir);
                            }
                        } catch (Exception e) {
                            LOGGER.makeAlert(e, "Failed to delete task directory").addData("taskDir", taskDir.toString()).addData("task", task.getId()).emit();
                        }
                    } catch (Exception e) {
                        LOGGER.error(e, "Suppressing exception caught while cleaning up task");
                    }
                }
            }
        })));
        saveRunningTasks();
        return tasks.get(task.getId()).getResult();
    }
}
Also used : Closer(org.apache.druid.java.util.common.io.Closer) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskLocation(org.apache.druid.indexer.TaskLocation) IOException(java.io.IOException) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) File(java.io.File) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap)

Example 5 with TaskStatus

use of org.apache.druid.indexer.TaskStatus in project druid by druid-io.

the class PartialSegmentMergeTask method runTask.

@Override
public TaskStatus runTask(TaskToolbox toolbox) throws Exception {
    // Group partitionLocations by interval and partitionId
    final Map<Interval, Int2ObjectMap<List<PartitionLocation>>> intervalToBuckets = new HashMap<>();
    for (PartitionLocation location : ioConfig.getPartitionLocations()) {
        intervalToBuckets.computeIfAbsent(location.getInterval(), k -> new Int2ObjectOpenHashMap<>()).computeIfAbsent(location.getBucketId(), k -> new ArrayList<>()).add(location);
    }
    final List<TaskLock> locks = toolbox.getTaskActionClient().submit(new SurrogateAction<>(supervisorTaskId, new LockListAction()));
    final Map<Interval, String> intervalToVersion = Maps.newHashMapWithExpectedSize(locks.size());
    locks.forEach(lock -> {
        if (lock.isRevoked()) {
            throw new ISE("Lock[%s] is revoked", lock);
        }
        final String mustBeNull = intervalToVersion.put(lock.getInterval(), lock.getVersion());
        if (mustBeNull != null) {
            throw new ISE("Unexpected state: Two versions([%s], [%s]) for the same interval[%s]", lock.getVersion(), mustBeNull, lock.getInterval());
        }
    });
    final Stopwatch fetchStopwatch = Stopwatch.createStarted();
    final Map<Interval, Int2ObjectMap<List<File>>> intervalToUnzippedFiles = fetchSegmentFiles(toolbox, intervalToBuckets);
    final long fetchTime = fetchStopwatch.elapsed(TimeUnit.SECONDS);
    fetchStopwatch.stop();
    LOG.info("Fetch took [%s] seconds", fetchTime);
    final ParallelIndexSupervisorTaskClient taskClient = toolbox.getSupervisorTaskClientFactory().build(new ClientBasedTaskInfoProvider(toolbox.getIndexingServiceClient()), getId(), // always use a single http thread
    1, getTuningConfig().getChatHandlerTimeout(), getTuningConfig().getChatHandlerNumRetries());
    final File persistDir = toolbox.getPersistDir();
    org.apache.commons.io.FileUtils.deleteQuietly(persistDir);
    FileUtils.mkdirp(persistDir);
    final Set<DataSegment> pushedSegments = mergeAndPushSegments(toolbox, getDataSchema(), getTuningConfig(), persistDir, intervalToVersion, intervalToUnzippedFiles);
    taskClient.report(supervisorTaskId, new PushedSegmentsReport(getId(), Collections.emptySet(), pushedSegments, ImmutableMap.of()));
    return TaskStatus.success(getId());
}
Also used : TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) LockListAction(org.apache.druid.indexing.common.actions.LockListAction) Logger(org.apache.druid.java.util.common.logger.Logger) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Arrays(java.util.Arrays) Stopwatch(com.google.common.base.Stopwatch) HashMap(java.util.HashMap) TaskResource(org.apache.druid.indexing.common.task.TaskResource) TaskStatus(org.apache.druid.indexer.TaskStatus) Pair(org.apache.druid.java.util.common.Pair) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) RetryUtils(org.apache.druid.java.util.common.RetryUtils) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) FileUtils(org.apache.druid.java.util.common.FileUtils) Nullable(javax.annotation.Nullable) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ImmutableMap(com.google.common.collect.ImmutableMap) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) IndexMerger(org.apache.druid.segment.IndexMerger) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) Maps(com.google.common.collect.Maps) Collectors(java.util.stream.Collectors) File(java.io.File) TimeUnit(java.util.concurrent.TimeUnit) List(java.util.List) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) SurrogateAction(org.apache.druid.indexing.common.actions.SurrogateAction) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) Preconditions(com.google.common.base.Preconditions) IndexIO(org.apache.druid.segment.IndexIO) DataSchema(org.apache.druid.segment.indexing.DataSchema) Collections(java.util.Collections) LockListAction(org.apache.druid.indexing.common.actions.LockListAction) HashMap(java.util.HashMap) Int2ObjectOpenHashMap(it.unimi.dsi.fastutil.ints.Int2ObjectOpenHashMap) ArrayList(java.util.ArrayList) Stopwatch(com.google.common.base.Stopwatch) Int2ObjectMap(it.unimi.dsi.fastutil.ints.Int2ObjectMap) DataSegment(org.apache.druid.timeline.DataSegment) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) ClientBasedTaskInfoProvider(org.apache.druid.indexing.common.task.ClientBasedTaskInfoProvider) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

TaskStatus (org.apache.druid.indexer.TaskStatus)135 Test (org.junit.Test)103 DataSegment (org.apache.druid.timeline.DataSegment)55 List (java.util.List)50 ImmutableList (com.google.common.collect.ImmutableList)44 ArrayList (java.util.ArrayList)41 TaskToolbox (org.apache.druid.indexing.common.TaskToolbox)40 Task (org.apache.druid.indexing.common.task.Task)39 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)37 Map (java.util.Map)34 File (java.io.File)32 IOException (java.io.IOException)26 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)25 ImmutableMap (com.google.common.collect.ImmutableMap)25 SegmentDescriptor (org.apache.druid.query.SegmentDescriptor)25 DataSchema (org.apache.druid.segment.indexing.DataSchema)25 ISE (org.apache.druid.java.util.common.ISE)24 HashMap (java.util.HashMap)23 Executor (java.util.concurrent.Executor)23 Pair (org.apache.druid.java.util.common.Pair)23