Search in sources :

Example 1 with RealtimeAppenderatorTuningConfig

use of org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTaskTest method makeRealtimeTask.

private AppenderatorDriverRealtimeIndexTask makeRealtimeTask(final String taskId, final TransformSpec transformSpec, final boolean reportParseExceptions, final long handoffTimeout, final Boolean logParseExceptions, final Integer maxParseExceptions, final Integer maxSavedParseExceptions, final Integer maxRowsPerSegment, final Long maxTotalRows) {
    DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec("t", "auto", null), new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim1t"), new LongDimensionSchema("dimLong"), new FloatDimensionSchema("dimFloat"))))), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new CountAggregatorFactory("rows"), new LongSumAggregatorFactory("met1", "met1") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), transformSpec, OBJECT_MAPPER);
    RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new TestFirehoseFactory(), null);
    RealtimeAppenderatorTuningConfig tuningConfig = new RealtimeAppenderatorTuningConfig(null, 1000, null, null, maxRowsPerSegment, maxTotalRows, null, null, null, null, null, null, reportParseExceptions, handoffTimeout, null, null, logParseExceptions, maxParseExceptions, maxSavedParseExceptions);
    return new AppenderatorDriverRealtimeIndexTask(taskId, null, new RealtimeAppenderatorIngestionSpec(dataSchema, realtimeIOConfig, tuningConfig), null) {

        @Override
        protected boolean isFirehoseDrainableByClosing(FirehoseFactory firehoseFactory) {
            return true;
        }
    };
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) LongDimensionSchema(org.apache.druid.data.input.impl.LongDimensionSchema) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) FloatDimensionSchema(org.apache.druid.data.input.impl.FloatDimensionSchema) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec)

Example 2 with RealtimeAppenderatorTuningConfig

use of org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) {
    runThread = Thread.currentThread();
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
    setupTimeoutAlert();
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
    this.metrics = fireDepartmentForMetrics.getMetrics();
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
    appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
    TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
    StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
    try {
        log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        driver.startJob(segmentId -> {
            try {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        int sequenceNumber = 0;
        String sequenceName = makeSequenceName(getId(), sequenceNumber);
        final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
            if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
            }
            if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
            }
            final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
            return toolbox.getTaskActionClient().submit(action);
        };
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Time to read data!
        while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
            try {
                InputRow inputRow = firehose.nextRow();
                if (inputRow == null) {
                    log.debug("Discarded null row, considering thrownAway.");
                    rowIngestionMeters.incrementThrownAway();
                } else {
                    AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
                    if (addResult.isOk()) {
                        final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                        if (isPushRequired) {
                            publishSegments(driver, publisher, committerSupplier, sequenceName);
                            sequenceNumber++;
                            sequenceName = makeSequenceName(getId(), sequenceNumber);
                        }
                    } else {
                        // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                }
            } catch (ParseException e) {
                handleParseException(e);
            }
        }
        ingestionState = IngestionState.COMPLETED;
        if (!gracefullyStopped) {
            synchronized (this) {
                if (gracefullyStopped) {
                    // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                    log.info("Gracefully stopping.");
                } else {
                    finishingJob = true;
                }
            }
            if (finishingJob) {
                log.info("Finishing job...");
                // Publish any remaining segments
                publishSegments(driver, publisher, committerSupplier, sequenceName);
                waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
            }
        } else if (firehose != null) {
            log.info("Task was gracefully stopped, will persist data before exiting");
            persistAndWait(driver, committerSupplier.get());
        }
    } catch (Throwable e) {
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
        CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
        appenderator.close();
        CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
        toolbox.removeMonitor(metricsMonitor);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }
    }
    log.info("Job done!");
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
    return TaskStatus.success(getId());
}
Also used : StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Timer(java.util.Timer) IngestionState(org.apache.druid.indexer.IngestionState) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) MediaType(javax.ws.rs.core.MediaType) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) QueryRunner(org.apache.druid.query.QueryRunner) TimerTask(java.util.TimerTask) DateTimes(org.apache.druid.java.util.common.DateTimes) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) Context(javax.ws.rs.core.Context) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) Response(javax.ws.rs.core.Response) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TaskLockType(org.apache.druid.indexing.common.TaskLockType) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) NodeRole(org.apache.druid.discovery.NodeRole) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ActionBasedSegmentAllocator(org.apache.druid.indexing.appenderator.ActionBasedSegmentAllocator) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) HttpServletRequest(javax.servlet.http.HttpServletRequest) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ActionBasedUsedSegmentChecker(org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) Throwables(com.google.common.base.Throwables) Committers(org.apache.druid.segment.realtime.plumber.Committers) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) SegmentUtils(org.apache.druid.segment.SegmentUtils) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) DataSchema(org.apache.druid.segment.indexing.DataSchema) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) TaskLockType(org.apache.druid.indexing.common.TaskLockType) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Committer(org.apache.druid.data.input.Committer) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Aggregations

FirehoseFactory (org.apache.druid.data.input.FirehoseFactory)2 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)1 JsonIgnore (com.fasterxml.jackson.annotation.JsonIgnore)1 JsonProperty (com.fasterxml.jackson.annotation.JsonProperty)1 VisibleForTesting (com.google.common.annotations.VisibleForTesting)1 Preconditions (com.google.common.base.Preconditions)1 Supplier (com.google.common.base.Supplier)1 Throwables (com.google.common.base.Throwables)1 ImmutableMap (com.google.common.collect.ImmutableMap)1 AsyncFunction (com.google.common.util.concurrent.AsyncFunction)1 Futures (com.google.common.util.concurrent.Futures)1 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)1 IOException (java.io.IOException)1 Collections (java.util.Collections)1 HashMap (java.util.HashMap)1 List (java.util.List)1 Map (java.util.Map)1 Queue (java.util.Queue)1 Timer (java.util.Timer)1 TimerTask (java.util.TimerTask)1