Search in sources :

Example 1 with IngestionState

use of org.apache.druid.indexer.IngestionState in project druid by druid-io.

the class ParallelIndexSupervisorTask method doGetLiveReports.

@VisibleForTesting
public Map<String, Object> doGetLiveReports(String full) {
    Map<String, Object> returnMap = new HashMap<>();
    Map<String, Object> ingestionStatsAndErrors = new HashMap<>();
    Map<String, Object> payload = new HashMap<>();
    Pair<Map<String, Object>, Map<String, Object>> rowStatsAndUnparsebleEvents = doGetRowStatsAndUnparseableEvents(full, true);
    // use the sequential task's ingestion state if we were running that mode
    IngestionState ingestionStateForReport;
    if (isParallelMode()) {
        ingestionStateForReport = ingestionState;
    } else {
        IndexTask currentSequentialTask = (IndexTask) currentSubTaskHolder.getTask();
        ingestionStateForReport = currentSequentialTask == null ? ingestionState : currentSequentialTask.getIngestionState();
    }
    payload.put("ingestionState", ingestionStateForReport);
    payload.put("unparseableEvents", rowStatsAndUnparsebleEvents.rhs);
    payload.put("rowStats", rowStatsAndUnparsebleEvents.lhs);
    ingestionStatsAndErrors.put("taskId", getId());
    ingestionStatsAndErrors.put("payload", payload);
    ingestionStatsAndErrors.put("type", "ingestionStatsAndErrors");
    returnMap.put("ingestionStatsAndErrors", ingestionStatsAndErrors);
    return returnMap;
}
Also used : HashMap(java.util.HashMap) IngestionState(org.apache.druid.indexer.IngestionState) AbstractBatchIndexTask(org.apache.druid.indexing.common.task.AbstractBatchIndexTask) IndexTask(org.apache.druid.indexing.common.task.IndexTask) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 2 with IngestionState

use of org.apache.druid.indexer.IngestionState in project druid by druid-io.

the class IndexTask method generateAndPublishSegments.

/**
 * This method reads input data row by row and adds the read row to a proper segment using {@link BaseAppenderatorDriver}.
 * If there is no segment for the row, a new one is created.  Segments can be published in the middle of reading inputs
 * if {@link DynamicPartitionsSpec} is used and one of below conditions are satisfied.
 *
 * <ul>
 * <li>
 * If the number of rows in a segment exceeds {@link DynamicPartitionsSpec#maxRowsPerSegment}
 * </li>
 * <li>
 * If the number of rows added to {@link BaseAppenderatorDriver} so far exceeds {@link DynamicPartitionsSpec#maxTotalRows}
 * </li>
 * </ul>
 * <p>
 * At the end of this method, all the remaining segments are published.
 *
 * @return the last {@link TaskStatus}
 */
private TaskStatus generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final InputSource inputSource, final File tmpDir, final PartitionAnalysis partitionAnalysis) throws IOException, InterruptedException {
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    FireDepartmentMetrics buildSegmentsFireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    if (toolbox.getMonitorScheduler() != null) {
        final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, buildSegmentsMeters);
        toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
    }
    final PartitionsSpec partitionsSpec = partitionAnalysis.getPartitionsSpec();
    final IndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
    final long pushTimeout = tuningConfig.getPushTimeout();
    final SegmentAllocatorForBatch segmentAllocator;
    final SequenceNameFunction sequenceNameFunction;
    switch(partitionsSpec.getType()) {
        case HASH:
        case RANGE:
            final SegmentAllocatorForBatch localSegmentAllocator = SegmentAllocators.forNonLinearPartitioning(toolbox, getDataSource(), baseSequenceName, dataSchema.getGranularitySpec(), null, (CompletePartitionAnalysis) partitionAnalysis);
            sequenceNameFunction = localSegmentAllocator.getSequenceNameFunction();
            segmentAllocator = localSegmentAllocator;
            break;
        case LINEAR:
            segmentAllocator = SegmentAllocators.forLinearPartitioning(toolbox, baseSequenceName, null, dataSchema, getTaskLockHelper(), ingestionSchema.getIOConfig().isAppendToExisting(), partitionAnalysis.getPartitionsSpec(), null);
            sequenceNameFunction = segmentAllocator.getSequenceNameFunction();
            break;
        default:
            throw new UOE("[%s] secondary partition type is not supported", partitionsSpec.getType());
    }
    Set<DataSegment> segmentsFoundForDrop = null;
    if (ingestionSchema.getIOConfig().isDropExisting()) {
        segmentsFoundForDrop = getUsedSegmentsWithinInterval(toolbox, getDataSource(), ingestionSchema.getDataSchema().getGranularitySpec().inputIntervals());
    }
    final TransactionalSegmentPublisher publisher = (segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish, commitMetadata) -> toolbox.getTaskActionClient().submit(SegmentTransactionalInsertAction.overwriteAction(segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish));
    String effectiveId = getContextValue(CompactionTask.CTX_KEY_APPENDERATOR_TRACKING_TASK_ID, null);
    if (effectiveId == null) {
        effectiveId = getId();
    }
    final Appenderator appenderator = BatchAppenderators.newAppenderator(effectiveId, toolbox.getAppenderatorsManager(), buildSegmentsFireDepartmentMetrics, toolbox, dataSchema, tuningConfig, buildSegmentsMeters, buildSegmentsParseExceptionHandler, isUseMaxMemoryEstimates());
    boolean exceptionOccurred = false;
    try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) {
        driver.startJob();
        InputSourceProcessor.process(dataSchema, driver, partitionsSpec, inputSource, inputSource.needsFormat() ? getInputFormat(ingestionSchema) : null, tmpDir, sequenceNameFunction, new DefaultIndexTaskInputRowIteratorBuilder(), buildSegmentsMeters, buildSegmentsParseExceptionHandler, pushTimeout);
        // If we use timeChunk lock, then we don't have to specify what segments will be overwritten because
        // it will just overwrite all segments overlapped with the new segments.
        final Set<DataSegment> inputSegments = getTaskLockHelper().isUseSegmentLock() ? getTaskLockHelper().getLockedExistingSegments() : null;
        final boolean storeCompactionState = getContextValue(Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE);
        final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = compactionStateAnnotateFunction(storeCompactionState, toolbox, ingestionSchema);
        // Probably we can publish atomicUpdateGroup along with segments.
        final SegmentsAndCommitMetadata published = awaitPublish(driver.publishAll(inputSegments, segmentsFoundForDrop, publisher, annotateFunction), pushTimeout);
        appenderator.close();
        // for awaitSegmentAvailabilityTimeoutMillis
        if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0 && published != null) {
            ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT;
            ArrayList<DataSegment> segmentsToWaitFor = new ArrayList<>(published.getSegments());
            waitForSegmentAvailability(toolbox, segmentsToWaitFor, tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis());
        }
        ingestionState = IngestionState.COMPLETED;
        if (published == null) {
            log.error("Failed to publish segments, aborting!");
            errorMsg = "Failed to publish segments.";
            toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
            return TaskStatus.failure(getId(), errorMsg);
        } else {
            log.info("Processed[%,d] events, unparseable[%,d], thrownAway[%,d].", buildSegmentsMeters.getProcessed(), buildSegmentsMeters.getUnparseable(), buildSegmentsMeters.getThrownAway());
            log.info("Published [%s] segments", published.getSegments().size());
            log.debugSegments(published.getSegments(), "Published segments");
            toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
            return TaskStatus.success(getId());
        }
    } catch (TimeoutException | ExecutionException e) {
        exceptionOccurred = true;
        throw new RuntimeException(e);
    } catch (Exception e) {
        exceptionOccurred = true;
        throw e;
    } finally {
        if (exceptionOccurred) {
            appenderator.closeNow();
        } else {
            appenderator.close();
        }
    }
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Comparators(org.apache.druid.java.util.common.guava.Comparators) Produces(javax.ws.rs.Produces) IndexSpec(org.apache.druid.segment.IndexSpec) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) IngestionState(org.apache.druid.indexer.IngestionState) CompletePartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.CompletePartitionAnalysis) MediaType(javax.ws.rs.core.MediaType) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Optional(com.google.common.base.Optional) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) LinearPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis) Property(org.apache.druid.indexer.Property) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) BaseAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BaseAppenderatorDriver) FirehoseFactoryToInputSourceAdaptor(org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) AppenderatorConfig(org.apache.druid.segment.realtime.appenderator.AppenderatorConfig) GET(javax.ws.rs.GET) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Rows(org.apache.druid.data.input.Rows) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) UOE(org.apache.druid.java.util.common.UOE) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) IndexMerger(org.apache.druid.segment.IndexMerger) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) PartialHashSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TreeMap(java.util.TreeMap) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) DataSchema(org.apache.druid.segment.indexing.DataSchema) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) HashPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.HashPartitionAnalysis) PartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.PartitionAnalysis) TimeoutException(java.util.concurrent.TimeoutException) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) DefaultIndexTaskInputRowIteratorBuilder(org.apache.druid.indexing.common.task.batch.parallel.iterator.DefaultIndexTaskInputRowIteratorBuilder) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Context(javax.ws.rs.core.Context) Predicate(java.util.function.Predicate) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) IngestionSpec(org.apache.druid.segment.indexing.IngestionSpec) Objects(java.util.Objects) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) HashFunction(com.google.common.hash.HashFunction) Logger(org.apache.druid.java.util.common.logger.Logger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Hashing(com.google.common.hash.Hashing) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) JsonTypeName(com.fasterxml.jackson.annotation.JsonTypeName) InputSource(org.apache.druid.data.input.InputSource) ImmutableList(com.google.common.collect.ImmutableList) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) Nonnull(javax.annotation.Nonnull) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) BatchIOConfig(org.apache.druid.segment.indexing.BatchIOConfig) SecondaryPartitionType(org.apache.druid.indexer.partitions.SecondaryPartitionType) Period(org.joda.time.Period) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) CircularBuffer(org.apache.druid.utils.CircularBuffer) TimeUnit(java.util.concurrent.TimeUnit) Checks(org.apache.druid.indexer.Checks) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Collections(java.util.Collections) DefaultIndexTaskInputRowIteratorBuilder(org.apache.druid.indexing.common.task.batch.parallel.iterator.DefaultIndexTaskInputRowIteratorBuilder) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Set(java.util.Set) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) UOE(org.apache.druid.java.util.common.UOE) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec)

Example 3 with IngestionState

use of org.apache.druid.indexer.IngestionState in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) {
    runThread = Thread.currentThread();
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
    setupTimeoutAlert();
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
    this.metrics = fireDepartmentForMetrics.getMetrics();
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
    appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
    TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
    StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
    try {
        log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        driver.startJob(segmentId -> {
            try {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        int sequenceNumber = 0;
        String sequenceName = makeSequenceName(getId(), sequenceNumber);
        final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
            if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
            }
            if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
            }
            final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
            return toolbox.getTaskActionClient().submit(action);
        };
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Time to read data!
        while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
            try {
                InputRow inputRow = firehose.nextRow();
                if (inputRow == null) {
                    log.debug("Discarded null row, considering thrownAway.");
                    rowIngestionMeters.incrementThrownAway();
                } else {
                    AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
                    if (addResult.isOk()) {
                        final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                        if (isPushRequired) {
                            publishSegments(driver, publisher, committerSupplier, sequenceName);
                            sequenceNumber++;
                            sequenceName = makeSequenceName(getId(), sequenceNumber);
                        }
                    } else {
                        // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                }
            } catch (ParseException e) {
                handleParseException(e);
            }
        }
        ingestionState = IngestionState.COMPLETED;
        if (!gracefullyStopped) {
            synchronized (this) {
                if (gracefullyStopped) {
                    // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                    log.info("Gracefully stopping.");
                } else {
                    finishingJob = true;
                }
            }
            if (finishingJob) {
                log.info("Finishing job...");
                // Publish any remaining segments
                publishSegments(driver, publisher, committerSupplier, sequenceName);
                waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
            }
        } else if (firehose != null) {
            log.info("Task was gracefully stopped, will persist data before exiting");
            persistAndWait(driver, committerSupplier.get());
        }
    } catch (Throwable e) {
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
        CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
        appenderator.close();
        CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
        toolbox.removeMonitor(metricsMonitor);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }
    }
    log.info("Job done!");
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
    return TaskStatus.success(getId());
}
Also used : StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Timer(java.util.Timer) IngestionState(org.apache.druid.indexer.IngestionState) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) MediaType(javax.ws.rs.core.MediaType) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) QueryRunner(org.apache.druid.query.QueryRunner) TimerTask(java.util.TimerTask) DateTimes(org.apache.druid.java.util.common.DateTimes) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) Context(javax.ws.rs.core.Context) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) Response(javax.ws.rs.core.Response) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TaskLockType(org.apache.druid.indexing.common.TaskLockType) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) NodeRole(org.apache.druid.discovery.NodeRole) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ActionBasedSegmentAllocator(org.apache.druid.indexing.appenderator.ActionBasedSegmentAllocator) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) HttpServletRequest(javax.servlet.http.HttpServletRequest) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ActionBasedUsedSegmentChecker(org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) Throwables(com.google.common.base.Throwables) Committers(org.apache.druid.segment.realtime.plumber.Committers) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) SegmentUtils(org.apache.druid.segment.SegmentUtils) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) DataSchema(org.apache.druid.segment.indexing.DataSchema) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) TaskLockType(org.apache.druid.indexing.common.TaskLockType) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Committer(org.apache.druid.data.input.Committer) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 4 with IngestionState

use of org.apache.druid.indexer.IngestionState in project druid by druid-io.

the class SeekableStreamIndexTaskRunner method runInternal.

private TaskStatus runInternal(TaskToolbox toolbox) throws Exception {
    startTime = DateTimes.nowUtc();
    status = Status.STARTING;
    setToolbox(toolbox);
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
    // Now we can initialize StreamChunkReader with the given toolbox.
    final StreamChunkParser parser = new StreamChunkParser<RecordType>(this.parser, inputFormat, inputRowSchema, task.getDataSchema().getTransformSpec(), toolbox.getIndexingTmpDir(), row -> row != null && task.withinMinMaxRecordTime(row), rowIngestionMeters, parseExceptionHandler);
    initializeSequences();
    log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
    toolbox.getChatHandlerProvider().register(task.getId(), this, false);
    runThread = Thread.currentThread();
    // Set up FireDepartmentMetrics
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(task.getDataSchema(), new RealtimeIOConfig(null, null), null);
    this.fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    toolbox.addMonitor(TaskRealtimeMetricsMonitorBuilder.build(task, fireDepartmentForMetrics, rowIngestionMeters));
    final String lookupTier = task.getContextValue(RealtimeIndexTask.CTX_KEY_LOOKUP_TIER);
    final LookupNodeService lookupNodeService = lookupTier == null ? toolbox.getLookupNodeService() : new LookupNodeService(lookupTier);
    final DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
    Throwable caughtExceptionOuter = null;
    // milliseconds waited for created segments to be handed off
    long handoffWaitMs = 0L;
    try (final RecordSupplier<PartitionIdType, SequenceOffsetType, RecordType> recordSupplier = task.newTaskRecordSupplier()) {
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        appenderator = task.newAppenderator(toolbox, fireDepartmentMetrics, rowIngestionMeters, parseExceptionHandler);
        driver = task.newDriver(appenderator, toolbox, fireDepartmentMetrics);
        // Start up, set up initial sequences.
        final Object restoredMetadata = driver.startJob(segmentId -> {
            try {
                if (lockGranularityToUse == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        if (restoredMetadata == null) {
            // no persist has happened so far
            // so either this is a brand new task or replacement of a failed task
            Preconditions.checkState(sequences.get(0).startOffsets.entrySet().stream().allMatch(partitionOffsetEntry -> createSequenceNumber(partitionOffsetEntry.getValue()).compareTo(createSequenceNumber(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(partitionOffsetEntry.getKey()))) >= 0), "Sequence sequences are not compatible with start sequences of task");
            currOffsets.putAll(sequences.get(0).startOffsets);
        } else {
            @SuppressWarnings("unchecked") final Map<String, Object> restoredMetadataMap = (Map) restoredMetadata;
            final SeekableStreamEndSequenceNumbers<PartitionIdType, SequenceOffsetType> restoredNextPartitions = deserializePartitionsFromMetadata(toolbox.getJsonMapper(), restoredMetadataMap.get(METADATA_NEXT_PARTITIONS));
            currOffsets.putAll(restoredNextPartitions.getPartitionSequenceNumberMap());
            // Sanity checks.
            if (!restoredNextPartitions.getStream().equals(ioConfig.getStartSequenceNumbers().getStream())) {
                throw new ISE("Restored stream[%s] but expected stream[%s]", restoredNextPartitions.getStream(), ioConfig.getStartSequenceNumbers().getStream());
            }
            if (!currOffsets.keySet().equals(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet())) {
                throw new ISE("Restored partitions[%s] but expected partitions[%s]", currOffsets.keySet(), ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().keySet());
            }
            // which is super rare
            if (sequences.size() == 0 || getLastSequenceMetadata().isCheckpointed()) {
                this.endOffsets.putAll(sequences.size() == 0 ? currOffsets : getLastSequenceMetadata().getEndOffsets());
            }
        }
        log.info("Initialized sequences: %s", sequences.stream().map(SequenceMetadata::toString).collect(Collectors.joining(", ")));
        // Filter out partitions with END_OF_SHARD markers since these partitions have already been fully read. This
        // should have been done by the supervisor already so this is defensive.
        int numPreFilterPartitions = currOffsets.size();
        if (currOffsets.entrySet().removeIf(x -> isEndOfShard(x.getValue()))) {
            log.info("Removed [%d] partitions from assignment which have already been closed.", numPreFilterPartitions - currOffsets.size());
        }
        // When end offsets are exclusive, we never skip the start record.
        if (!isEndOffsetExclusive()) {
            for (Map.Entry<PartitionIdType, SequenceOffsetType> entry : currOffsets.entrySet()) {
                final boolean isAtStart = entry.getValue().equals(ioConfig.getStartSequenceNumbers().getPartitionSequenceNumberMap().get(entry.getKey()));
                if (!isAtStart || ioConfig.getStartSequenceNumbers().getExclusivePartitions().contains(entry.getKey())) {
                    lastReadOffsets.put(entry.getKey(), entry.getValue());
                }
            }
        }
        // Set up committer.
        final Supplier<Committer> committerSupplier = () -> {
            final Map<PartitionIdType, SequenceOffsetType> snapshot = ImmutableMap.copyOf(currOffsets);
            lastPersistedOffsets.clear();
            lastPersistedOffsets.putAll(snapshot);
            return new Committer() {

                @Override
                public Object getMetadata() {
                    return ImmutableMap.of(METADATA_NEXT_PARTITIONS, new SeekableStreamEndSequenceNumbers<>(stream, snapshot));
                }

                @Override
                public void run() {
                // Do nothing.
                }
            };
        };
        // restart publishing of sequences (if any)
        maybePersistAndPublishSequences(committerSupplier);
        Set<StreamPartition<PartitionIdType>> assignment = assignPartitions(recordSupplier);
        possiblyResetDataSourceMetadata(toolbox, recordSupplier, assignment);
        seekToStartingSequence(recordSupplier, assignment);
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Main loop.
        // Could eventually support leader/follower mode (for keeping replicas more in sync)
        boolean stillReading = !assignment.isEmpty();
        status = Status.READING;
        Throwable caughtExceptionInner = null;
        try {
            while (stillReading) {
                if (possiblyPause()) {
                    // The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
                    // partitions upon resuming. Don't call "seekToStartingSequence" after "assignPartitions", because there's
                    // no need to re-seek here. All we're going to be doing is dropping partitions.
                    assignment = assignPartitions(recordSupplier);
                    possiblyResetDataSourceMetadata(toolbox, recordSupplier, assignment);
                    if (assignment.isEmpty()) {
                        log.debug("All partitions have been fully read.");
                        publishOnStop.set(true);
                        stopRequested.set(true);
                    }
                }
                // if stop is requested or task's end sequence is set by call to setEndOffsets method with finish set to true
                if (stopRequested.get() || sequences.size() == 0 || getLastSequenceMetadata().isCheckpointed()) {
                    status = Status.PUBLISHING;
                }
                if (stopRequested.get()) {
                    break;
                }
                if (backgroundThreadException != null) {
                    throw new RuntimeException(backgroundThreadException);
                }
                checkPublishAndHandoffFailure();
                maybePersistAndPublishSequences(committerSupplier);
                // calling getRecord() ensures that exceptions specific to kafka/kinesis like OffsetOutOfRangeException
                // are handled in the subclasses.
                List<OrderedPartitionableRecord<PartitionIdType, SequenceOffsetType, RecordType>> records = getRecords(recordSupplier, toolbox);
                // note: getRecords() also updates assignment
                stillReading = !assignment.isEmpty();
                SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceToCheckpoint = null;
                for (OrderedPartitionableRecord<PartitionIdType, SequenceOffsetType, RecordType> record : records) {
                    final boolean shouldProcess = verifyRecordInRange(record.getPartitionId(), record.getSequenceNumber());
                    log.trace("Got stream[%s] partition[%s] sequenceNumber[%s], shouldProcess[%s].", record.getStream(), record.getPartitionId(), record.getSequenceNumber(), shouldProcess);
                    if (shouldProcess) {
                        final List<InputRow> rows = parser.parse(record.getData(), isEndOfShard(record.getSequenceNumber()));
                        boolean isPersistRequired = false;
                        final SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceToUse = sequences.stream().filter(sequenceMetadata -> sequenceMetadata.canHandle(this, record)).findFirst().orElse(null);
                        if (sequenceToUse == null) {
                            throw new ISE("Cannot find any valid sequence for record with partition [%s] and sequenceNumber [%s]. Current sequences: %s", record.getPartitionId(), record.getSequenceNumber(), sequences);
                        }
                        for (InputRow row : rows) {
                            final AppenderatorDriverAddResult addResult = driver.add(row, sequenceToUse.getSequenceName(), committerSupplier, true, // of rows are indexed
                            false);
                            if (addResult.isOk()) {
                                // If the number of rows in the segment exceeds the threshold after adding a row,
                                // move the segment out from the active segments of BaseAppenderatorDriver to make a new segment.
                                final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                                if (isPushRequired && !sequenceToUse.isCheckpointed()) {
                                    sequenceToCheckpoint = sequenceToUse;
                                }
                                isPersistRequired |= addResult.isPersistRequired();
                            } else {
                                // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                                throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
                            }
                        }
                        if (isPersistRequired) {
                            Futures.addCallback(driver.persistAsync(committerSupplier.get()), new FutureCallback<Object>() {

                                @Override
                                public void onSuccess(@Nullable Object result) {
                                    log.debug("Persist completed with metadata: %s", result);
                                }

                                @Override
                                public void onFailure(Throwable t) {
                                    log.error("Persist failed, dying");
                                    backgroundThreadException = t;
                                }
                            });
                        }
                        // in kafka, we can easily get the next offset by adding 1, but for kinesis, there's no way
                        // to get the next sequence number without having to make an expensive api call. So the behavior
                        // here for kafka is to +1 while for kinesis we simply save the current sequence number
                        lastReadOffsets.put(record.getPartitionId(), record.getSequenceNumber());
                        currOffsets.put(record.getPartitionId(), getNextStartOffset(record.getSequenceNumber()));
                    }
                    // Use record.getSequenceNumber() in the moreToRead check, since currOffsets might not have been
                    // updated if we were skipping records for being beyond the end.
                    final boolean moreToReadAfterThisRecord = isMoreToReadAfterReadingRecord(record.getSequenceNumber(), endOffsets.get(record.getPartitionId()));
                    if (!moreToReadAfterThisRecord && assignment.remove(record.getStreamPartition())) {
                        log.info("Finished reading stream[%s], partition[%s].", record.getStream(), record.getPartitionId());
                        recordSupplier.assign(assignment);
                        stillReading = !assignment.isEmpty();
                    }
                }
                if (!stillReading) {
                    // We let the fireDepartmentMetrics know that all messages have been read. This way, some metrics such as
                    // high message gap need not be reported
                    fireDepartmentMetrics.markProcessingDone();
                }
                if (System.currentTimeMillis() > nextCheckpointTime) {
                    sequenceToCheckpoint = getLastSequenceMetadata();
                }
                if (sequenceToCheckpoint != null && stillReading) {
                    Preconditions.checkArgument(getLastSequenceMetadata().getSequenceName().equals(sequenceToCheckpoint.getSequenceName()), "Cannot checkpoint a sequence [%s] which is not the latest one, sequences %s", sequenceToCheckpoint, sequences);
                    requestPause();
                    final CheckPointDataSourceMetadataAction checkpointAction = new CheckPointDataSourceMetadataAction(task.getDataSource(), ioConfig.getTaskGroupId(), null, createDataSourceMetadata(new SeekableStreamStartSequenceNumbers<>(stream, sequenceToCheckpoint.getStartOffsets(), sequenceToCheckpoint.getExclusiveStartPartitions())));
                    if (!toolbox.getTaskActionClient().submit(checkpointAction)) {
                        throw new ISE("Checkpoint request with sequences [%s] failed, dying", currOffsets);
                    }
                }
            }
            ingestionState = IngestionState.COMPLETED;
        } catch (Exception e) {
            // (1) catch all exceptions while reading from kafka
            caughtExceptionInner = e;
            log.error(e, "Encountered exception in run() before persisting.");
            throw e;
        } finally {
            try {
                // persist pending data
                driver.persist(committerSupplier.get());
            } catch (Exception e) {
                if (caughtExceptionInner != null) {
                    caughtExceptionInner.addSuppressed(e);
                } else {
                    throw e;
                }
            }
        }
        synchronized (statusLock) {
            if (stopRequested.get() && !publishOnStop.get()) {
                throw new InterruptedException("Stopping without publishing");
            }
            status = Status.PUBLISHING;
        }
        // We need to copy sequences here, because the success callback in publishAndRegisterHandoff removes items from
        // the sequence list. If a publish finishes before we finish iterating through the sequence list, we can
        // end up skipping some sequences.
        List<SequenceMetadata<PartitionIdType, SequenceOffsetType>> sequencesSnapshot = new ArrayList<>(sequences);
        for (int i = 0; i < sequencesSnapshot.size(); i++) {
            final SequenceMetadata<PartitionIdType, SequenceOffsetType> sequenceMetadata = sequencesSnapshot.get(i);
            if (!publishingSequences.contains(sequenceMetadata.getSequenceName())) {
                final boolean isLast = i == (sequencesSnapshot.size() - 1);
                if (isLast) {
                    // Shorten endOffsets of the last sequence to match currOffsets.
                    sequenceMetadata.setEndOffsets(currOffsets);
                }
                // Update assignments of the sequence, which should clear them. (This will be checked later, when the
                // Committer is built.)
                sequenceMetadata.updateAssignments(currOffsets, this::isMoreToReadAfterReadingRecord);
                publishingSequences.add(sequenceMetadata.getSequenceName());
                // persist already done in finally, so directly add to publishQueue
                publishAndRegisterHandoff(sequenceMetadata);
            }
        }
        if (backgroundThreadException != null) {
            throw new RuntimeException(backgroundThreadException);
        }
        // Wait for publish futures to complete.
        Futures.allAsList(publishWaitList).get();
        // Wait for handoff futures to complete.
        // Note that every publishing task (created by calling AppenderatorDriver.publish()) has a corresponding
        // handoffFuture. handoffFuture can throw an exception if 1) the corresponding publishFuture failed or 2) it
        // failed to persist sequences. It might also return null if handoff failed, but was recoverable.
        // See publishAndRegisterHandoff() for details.
        List<SegmentsAndCommitMetadata> handedOffList = Collections.emptyList();
        if (tuningConfig.getHandoffConditionTimeout() == 0) {
            handedOffList = Futures.allAsList(handOffWaitList).get();
        } else {
            final long start = System.nanoTime();
            try {
                handedOffList = Futures.allAsList(handOffWaitList).get(tuningConfig.getHandoffConditionTimeout(), TimeUnit.MILLISECONDS);
            } catch (TimeoutException e) {
                // Handoff timeout is not an indexing failure, but coordination failure. We simply ignore timeout exception
                // here.
                log.makeAlert("Timeout waiting for handoff").addData("taskId", task.getId()).addData("handoffConditionTimeout", tuningConfig.getHandoffConditionTimeout()).emit();
            } finally {
                handoffWaitMs = TimeUnit.NANOSECONDS.toMillis(System.nanoTime() - start);
            }
        }
        for (SegmentsAndCommitMetadata handedOff : handedOffList) {
            log.info("Handoff complete for segments: %s", String.join(", ", Lists.transform(handedOff.getSegments(), DataSegment::toString)));
        }
        appenderator.close();
    } catch (InterruptedException | RejectedExecutionException e) {
        // (2) catch InterruptedException and RejectedExecutionException thrown for the whole ingestion steps including
        // the final publishing.
        caughtExceptionOuter = e;
        try {
            Futures.allAsList(publishWaitList).cancel(true);
            Futures.allAsList(handOffWaitList).cancel(true);
            if (appenderator != null) {
                appenderator.closeNow();
            }
        } catch (Exception e2) {
            e.addSuppressed(e2);
        }
        // handle the InterruptedException that gets wrapped in a RejectedExecutionException
        if (e instanceof RejectedExecutionException && (e.getCause() == null || !(e.getCause() instanceof InterruptedException))) {
            throw e;
        }
        // if we were interrupted because we were asked to stop, handle the exception and return success, else rethrow
        if (!stopRequested.get()) {
            Thread.currentThread().interrupt();
            throw e;
        }
    } catch (Exception e) {
        // (3) catch all other exceptions thrown for the whole ingestion steps including the final publishing.
        caughtExceptionOuter = e;
        try {
            Futures.allAsList(publishWaitList).cancel(true);
            Futures.allAsList(handOffWaitList).cancel(true);
            if (appenderator != null) {
                appenderator.closeNow();
            }
        } catch (Exception e2) {
            e.addSuppressed(e2);
        }
        throw e;
    } finally {
        try {
            if (driver != null) {
                driver.close();
            }
            toolbox.getChatHandlerProvider().unregister(task.getId());
            if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
                toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
                toolbox.getDataSegmentServerAnnouncer().unannounce();
            }
        } catch (Throwable e) {
            if (caughtExceptionOuter != null) {
                caughtExceptionOuter.addSuppressed(e);
            } else {
                throw e;
            }
        }
    }
    toolbox.getTaskReportFileWriter().write(task.getId(), getTaskCompletionReports(null, handoffWaitMs));
    return TaskStatus.success(task.getId());
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) IngestionState(org.apache.druid.indexer.IngestionState) MediaType(javax.ws.rs.core.MediaType) Future(java.util.concurrent.Future) Map(java.util.Map) InputFormat(org.apache.druid.data.input.InputFormat) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) NotNull(javax.validation.constraints.NotNull) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) IndexTaskUtils(org.apache.druid.indexing.common.task.IndexTaskUtils) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) InputRowSchema(org.apache.druid.data.input.InputRowSchema) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) HttpServletRequest(javax.servlet.http.HttpServletRequest) Lists(com.google.common.collect.Lists) CheckPointDataSourceMetadataAction(org.apache.druid.indexing.common.actions.CheckPointDataSourceMetadataAction) Nullable(javax.annotation.Nullable) Throwables(com.google.common.base.Throwables) IOException(java.io.IOException) FutureCallback(com.google.common.util.concurrent.FutureCallback) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) Futures(com.google.common.util.concurrent.Futures) Lock(java.util.concurrent.locks.Lock) TreeMap(java.util.TreeMap) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) ResetDataSourceMetadataAction(org.apache.druid.indexing.common.actions.ResetDataSourceMetadataAction) SettableFuture(com.google.common.util.concurrent.SettableFuture) SeekableStreamSupervisor(org.apache.druid.indexing.seekablestream.supervisor.SeekableStreamSupervisor) ByteBuffer(java.nio.ByteBuffer) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) Consumes(javax.ws.rs.Consumes) TaskLock(org.apache.druid.indexing.common.TaskLock) DefaultValue(javax.ws.rs.DefaultValue) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) TypeReference(com.fasterxml.jackson.core.type.TypeReference) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) ImmutableSet(com.google.common.collect.ImmutableSet) Context(javax.ws.rs.core.Context) ImmutableMap(com.google.common.collect.ImmutableMap) InputRowSchemas(org.apache.druid.indexing.input.InputRowSchemas) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) LockGranularity(org.apache.druid.indexing.common.LockGranularity) OrderedSequenceNumber(org.apache.druid.indexing.seekablestream.common.OrderedSequenceNumber) List(java.util.List) Response(javax.ws.rs.core.Response) TaskLockType(org.apache.druid.indexing.common.TaskLockType) DataSegment(org.apache.druid.timeline.DataSegment) ByteEntity(org.apache.druid.data.input.impl.ByteEntity) NodeRole(org.apache.druid.discovery.NodeRole) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) RecordSupplier(org.apache.druid.indexing.seekablestream.common.RecordSupplier) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ConcurrentMap(java.util.concurrent.ConcurrentMap) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) HashSet(java.util.HashSet) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) Access(org.apache.druid.server.security.Access) POST(javax.ws.rs.POST) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) Iterator(java.util.Iterator) ReentrantLock(java.util.concurrent.locks.ReentrantLock) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) TimeUnit(java.util.concurrent.TimeUnit) Condition(java.util.concurrent.locks.Condition) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) StreamPartition(org.apache.druid.indexing.seekablestream.common.StreamPartition) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) OrderedPartitionableRecord(org.apache.druid.indexing.seekablestream.common.OrderedPartitionableRecord) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) ArrayList(java.util.ArrayList) CheckPointDataSourceMetadataAction(org.apache.druid.indexing.common.actions.CheckPointDataSourceMetadataAction) TaskLock(org.apache.druid.indexing.common.TaskLock) TimeoutException(java.util.concurrent.TimeoutException) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ConcurrentMap(java.util.concurrent.ConcurrentMap) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ISE(org.apache.druid.java.util.common.ISE) LookupNodeService(org.apache.druid.discovery.LookupNodeService) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) Committer(org.apache.druid.data.input.Committer)

Aggregations

HashMap (java.util.HashMap)4 Map (java.util.Map)4 IngestionState (org.apache.druid.indexer.IngestionState)4 Preconditions (com.google.common.base.Preconditions)3 Throwables (com.google.common.base.Throwables)3 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)3 IOException (java.io.IOException)3 Collections (java.util.Collections)3 List (java.util.List)3 ExecutionException (java.util.concurrent.ExecutionException)3 TimeUnit (java.util.concurrent.TimeUnit)3 TimeoutException (java.util.concurrent.TimeoutException)3 HttpServletRequest (javax.servlet.http.HttpServletRequest)3 GET (javax.ws.rs.GET)3 Path (javax.ws.rs.Path)3 Produces (javax.ws.rs.Produces)3 Context (javax.ws.rs.core.Context)3 MediaType (javax.ws.rs.core.MediaType)3 Response (javax.ws.rs.core.Response)3 InputRow (org.apache.druid.data.input.InputRow)3