Search in sources :

Example 1 with FireDepartment

use of org.apache.druid.segment.realtime.FireDepartment in project druid by druid-io.

the class RealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    runThread = Thread.currentThread();
    if (this.plumber != null) {
        throw new IllegalStateException("Plumber must be null");
    }
    setupTimeoutAlert();
    boolean normalExit = true;
    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.
    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);
    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.
    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
    // Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
    // lock to be acquired.
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
            if (lock.isRevoked()) {
                throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegment(segment);
        }

        @Override
        public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
                final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
                if (lock.isRevoked()) {
                    throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
                }
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
                for (DataSegment segment : segments) {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }
        }
    };
    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)
    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy() {

        @Override
        public String getVersion(final Interval interval) {
            try {
                // Side effect: Calling getVersion causes a lock to be acquired
                final TimeChunkLockAcquireAction action = new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs);
                final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action), "Cannot acquire a lock for interval[%s]", interval);
                if (lock.isRevoked()) {
                    throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
                }
                return lock.getVersion();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir()).withVersioningPolicy(versioningPolicy);
    final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
    this.metrics = fireDepartment.getMetrics();
    final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
    // NOTE: (partitionNum_index.zip for HDFS data storage) to mismatch, or it can cause historical nodes to load
    // NOTE: different instances of the "same" segment.
    final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryProcessingPool(), toolbox.getJoinableFactory(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getCachePopulatorStats(), toolbox.getJsonMapper());
    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null ? toolbox.getLookupNodeService() : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
    DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
    try {
        toolbox.getDataSegmentServerAnnouncer().announce();
        toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        plumber.startJob();
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        // Time to read data!
        while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
            Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
        }
    } catch (Throwable e) {
        normalExit = false;
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        throw e;
    } finally {
        if (normalExit) {
            try {
                // Persist if we had actually started.
                if (firehose != null) {
                    log.info("Persisting remaining data.");
                    final Committer committer = committerSupplier.get();
                    final CountDownLatch persistLatch = new CountDownLatch(1);
                    plumber.persist(new Committer() {

                        @Override
                        public Object getMetadata() {
                            return committer.getMetadata();
                        }

                        @Override
                        public void run() {
                            try {
                                committer.run();
                            } finally {
                                persistLatch.countDown();
                            }
                        }
                    });
                    persistLatch.await();
                }
                if (gracefullyStopped) {
                    log.info("Gracefully stopping.");
                } else {
                    log.info("Finishing the job.");
                    synchronized (this) {
                        if (gracefullyStopped) {
                            // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                            log.info("Gracefully stopping.");
                        } else {
                            finishingJob = true;
                        }
                    }
                    if (finishingJob) {
                        plumber.finishJob();
                    }
                }
            } catch (InterruptedException e) {
                log.debug(e, "Interrupted while finishing the job");
            } catch (Exception e) {
                log.makeAlert(e, "Failed to finish realtime task").emit();
                throw e;
            } finally {
                if (firehose != null) {
                    CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
                }
                toolbox.removeMonitor(metricsMonitor);
            }
        }
        toolbox.getDataSegmentServerAnnouncer().unannounce();
        toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
    }
    log.info("Job done!");
    return TaskStatus.success(getId());
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) SegmentPublisher(org.apache.druid.segment.realtime.SegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) PlumberSchool(org.apache.druid.segment.realtime.plumber.PlumberSchool) RealtimePlumberSchool(org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool) LookupNodeService(org.apache.druid.discovery.LookupNodeService) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) LockReleaseAction(org.apache.druid.indexing.common.actions.LockReleaseAction) IOException(java.io.IOException) DataSchema(org.apache.druid.segment.indexing.DataSchema) VersioningPolicy(org.apache.druid.segment.realtime.plumber.VersioningPolicy) RealtimePlumberSchool(org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) RealtimeMetricsMonitor(org.apache.druid.segment.realtime.RealtimeMetricsMonitor) Committer(org.apache.druid.data.input.Committer) Interval(org.joda.time.Interval)

Example 2 with FireDepartment

use of org.apache.druid.segment.realtime.FireDepartment in project druid by druid-io.

the class PartialSegmentGenerateTask method generateSegments.

private List<DataSegment> generateSegments(final TaskToolbox toolbox, final ParallelIndexSupervisorTaskClient taskClient, final InputSource inputSource, final File tmpDir) throws IOException, InterruptedException, ExecutionException, TimeoutException {
    final DataSchema dataSchema = ingestionSchema.getDataSchema();
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    final RowIngestionMeters buildSegmentsMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    toolbox.addMonitor(new RealtimeMetricsMonitor(Collections.singletonList(fireDepartmentForMetrics), Collections.singletonMap(DruidMetrics.TASK_ID, new String[] { getId() })));
    final ParallelIndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
    final PartitionsSpec partitionsSpec = tuningConfig.getGivenOrDefaultPartitionsSpec();
    final long pushTimeout = tuningConfig.getPushTimeout();
    final SegmentAllocatorForBatch segmentAllocator = createSegmentAllocator(toolbox, taskClient);
    final SequenceNameFunction sequenceNameFunction = segmentAllocator.getSequenceNameFunction();
    final ParseExceptionHandler parseExceptionHandler = new ParseExceptionHandler(buildSegmentsMeters, tuningConfig.isLogParseExceptions(), tuningConfig.getMaxParseExceptions(), tuningConfig.getMaxSavedParseExceptions());
    final boolean useMaxMemoryEstimates = getContextValue(Tasks.USE_MAX_MEMORY_ESTIMATES, Tasks.DEFAULT_USE_MAX_MEMORY_ESTIMATES);
    final Appenderator appenderator = BatchAppenderators.newAppenderator(getId(), toolbox.getAppenderatorsManager(), fireDepartmentMetrics, toolbox, dataSchema, tuningConfig, new ShuffleDataSegmentPusher(supervisorTaskId, getId(), toolbox.getIntermediaryDataManager()), buildSegmentsMeters, parseExceptionHandler, useMaxMemoryEstimates);
    boolean exceptionOccurred = false;
    try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) {
        driver.startJob();
        final SegmentsAndCommitMetadata pushed = InputSourceProcessor.process(dataSchema, driver, partitionsSpec, inputSource, inputSource.needsFormat() ? ParallelIndexSupervisorTask.getInputFormat(ingestionSchema) : null, tmpDir, sequenceNameFunction, inputRowIteratorBuilder, buildSegmentsMeters, parseExceptionHandler, pushTimeout);
        return pushed.getSegments();
    } catch (Exception e) {
        exceptionOccurred = true;
        throw e;
    } finally {
        if (exceptionOccurred) {
            appenderator.closeNow();
        } else {
            appenderator.close();
        }
    }
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) ShuffleDataSegmentPusher(org.apache.druid.indexing.worker.shuffle.ShuffleDataSegmentPusher) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) TimeoutException(java.util.concurrent.TimeoutException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) DataSchema(org.apache.druid.segment.indexing.DataSchema) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) SegmentAllocatorForBatch(org.apache.druid.indexing.common.task.SegmentAllocatorForBatch) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) RealtimeMetricsMonitor(org.apache.druid.segment.realtime.RealtimeMetricsMonitor) SequenceNameFunction(org.apache.druid.indexing.common.task.SequenceNameFunction) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters)

Example 3 with FireDepartment

use of org.apache.druid.segment.realtime.FireDepartment in project druid by druid-io.

the class TaskSerdeTest method testRealtimeIndexTaskSerde.

@Test
public void testRealtimeIndexTaskSerde() throws Exception {
    final RealtimeIndexTask task = new RealtimeIndexTask(null, new TaskResource("rofl", 2), new FireDepartment(new DataSchema("foo", null, new AggregatorFactory[0], new UniformGranularitySpec(Granularities.HOUR, Granularities.NONE, null), null, jsonMapper), new RealtimeIOConfig(new LocalFirehoseFactory(new File("lol"), "rofl", null), (schema, config, metrics) -> null), new RealtimeTuningConfig(null, 1, 10L, null, new Period("PT10M"), null, null, null, null, 1, NoneShardSpec.instance(), indexSpec, null, 0, 0, true, null, null, null, null)), null);
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final RealtimeIndexTask task2 = (RealtimeIndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
    Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(new Period("PT10M"), task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
    Assert.assertEquals(Granularities.HOUR, task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
    Assert.assertTrue(task.getRealtimeIngestionSchema().getTuningConfig().isReportParseExceptions());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod(), task2.getRealtimeIngestionSchema().getTuningConfig().getWindowPeriod());
    Assert.assertEquals(task.getRealtimeIngestionSchema().getTuningConfig().getMaxBytesInMemory(), task2.getRealtimeIngestionSchema().getTuningConfig().getMaxBytesInMemory());
    Assert.assertEquals(task.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity(), task2.getRealtimeIngestionSchema().getDataSchema().getGranularitySpec().getSegmentGranularity());
}
Also used : DataSchema(org.apache.druid.segment.indexing.DataSchema) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Period(org.joda.time.Period) LocalFirehoseFactory(org.apache.druid.segment.realtime.firehose.LocalFirehoseFactory) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) File(java.io.File) Test(org.junit.Test)

Example 4 with FireDepartment

use of org.apache.druid.segment.realtime.FireDepartment in project druid by druid-io.

the class TaskLifecycleTest method newRealtimeIndexTask.

private RealtimeIndexTask newRealtimeIndexTask() {
    String taskId = StringUtils.format("rt_task_%s", System.currentTimeMillis());
    DataSchema dataSchema = new DataSchema("test_ds", TestHelper.makeJsonMapper().convertValue(new MapInputRowParser(new TimeAndDimsParseSpec(new TimestampSpec(null, null, null), DimensionsSpec.EMPTY)), JacksonUtils.TYPE_REFERENCE_MAP_STRING_OBJECT), new AggregatorFactory[] { new LongSumAggregatorFactory("count", "rows") }, new UniformGranularitySpec(Granularities.DAY, Granularities.NONE, null), null, mapper);
    RealtimeIOConfig realtimeIOConfig = new RealtimeIOConfig(new MockFirehoseFactory(), null);
    RealtimeTuningConfig realtimeTuningConfig = new RealtimeTuningConfig(null, 1000, null, null, new Period("P1Y"), // default window period of 10 minutes
    null, // base persist dir ignored by Realtime Index task
    null, null, null, null, null, null, null, 0, 0, null, null, null, null, null);
    FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, realtimeTuningConfig);
    return new RealtimeIndexTask(taskId, new TaskResource(taskId, 1), fireDepartment, null);
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) RealtimeIndexTask(org.apache.druid.indexing.common.task.RealtimeIndexTask) TaskResource(org.apache.druid.indexing.common.task.TaskResource) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) Period(org.joda.time.Period) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec)

Example 5 with FireDepartment

use of org.apache.druid.segment.realtime.FireDepartment in project druid by druid-io.

the class IndexTask method generateAndPublishSegments.

/**
 * This method reads input data row by row and adds the read row to a proper segment using {@link BaseAppenderatorDriver}.
 * If there is no segment for the row, a new one is created.  Segments can be published in the middle of reading inputs
 * if {@link DynamicPartitionsSpec} is used and one of below conditions are satisfied.
 *
 * <ul>
 * <li>
 * If the number of rows in a segment exceeds {@link DynamicPartitionsSpec#maxRowsPerSegment}
 * </li>
 * <li>
 * If the number of rows added to {@link BaseAppenderatorDriver} so far exceeds {@link DynamicPartitionsSpec#maxTotalRows}
 * </li>
 * </ul>
 * <p>
 * At the end of this method, all the remaining segments are published.
 *
 * @return the last {@link TaskStatus}
 */
private TaskStatus generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final InputSource inputSource, final File tmpDir, final PartitionAnalysis partitionAnalysis) throws IOException, InterruptedException {
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    FireDepartmentMetrics buildSegmentsFireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    if (toolbox.getMonitorScheduler() != null) {
        final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, buildSegmentsMeters);
        toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
    }
    final PartitionsSpec partitionsSpec = partitionAnalysis.getPartitionsSpec();
    final IndexTuningConfig tuningConfig = ingestionSchema.getTuningConfig();
    final long pushTimeout = tuningConfig.getPushTimeout();
    final SegmentAllocatorForBatch segmentAllocator;
    final SequenceNameFunction sequenceNameFunction;
    switch(partitionsSpec.getType()) {
        case HASH:
        case RANGE:
            final SegmentAllocatorForBatch localSegmentAllocator = SegmentAllocators.forNonLinearPartitioning(toolbox, getDataSource(), baseSequenceName, dataSchema.getGranularitySpec(), null, (CompletePartitionAnalysis) partitionAnalysis);
            sequenceNameFunction = localSegmentAllocator.getSequenceNameFunction();
            segmentAllocator = localSegmentAllocator;
            break;
        case LINEAR:
            segmentAllocator = SegmentAllocators.forLinearPartitioning(toolbox, baseSequenceName, null, dataSchema, getTaskLockHelper(), ingestionSchema.getIOConfig().isAppendToExisting(), partitionAnalysis.getPartitionsSpec(), null);
            sequenceNameFunction = segmentAllocator.getSequenceNameFunction();
            break;
        default:
            throw new UOE("[%s] secondary partition type is not supported", partitionsSpec.getType());
    }
    Set<DataSegment> segmentsFoundForDrop = null;
    if (ingestionSchema.getIOConfig().isDropExisting()) {
        segmentsFoundForDrop = getUsedSegmentsWithinInterval(toolbox, getDataSource(), ingestionSchema.getDataSchema().getGranularitySpec().inputIntervals());
    }
    final TransactionalSegmentPublisher publisher = (segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish, commitMetadata) -> toolbox.getTaskActionClient().submit(SegmentTransactionalInsertAction.overwriteAction(segmentsToBeOverwritten, segmentsToDrop, segmentsToPublish));
    String effectiveId = getContextValue(CompactionTask.CTX_KEY_APPENDERATOR_TRACKING_TASK_ID, null);
    if (effectiveId == null) {
        effectiveId = getId();
    }
    final Appenderator appenderator = BatchAppenderators.newAppenderator(effectiveId, toolbox.getAppenderatorsManager(), buildSegmentsFireDepartmentMetrics, toolbox, dataSchema, tuningConfig, buildSegmentsMeters, buildSegmentsParseExceptionHandler, isUseMaxMemoryEstimates());
    boolean exceptionOccurred = false;
    try (final BatchAppenderatorDriver driver = BatchAppenderators.newDriver(appenderator, toolbox, segmentAllocator)) {
        driver.startJob();
        InputSourceProcessor.process(dataSchema, driver, partitionsSpec, inputSource, inputSource.needsFormat() ? getInputFormat(ingestionSchema) : null, tmpDir, sequenceNameFunction, new DefaultIndexTaskInputRowIteratorBuilder(), buildSegmentsMeters, buildSegmentsParseExceptionHandler, pushTimeout);
        // If we use timeChunk lock, then we don't have to specify what segments will be overwritten because
        // it will just overwrite all segments overlapped with the new segments.
        final Set<DataSegment> inputSegments = getTaskLockHelper().isUseSegmentLock() ? getTaskLockHelper().getLockedExistingSegments() : null;
        final boolean storeCompactionState = getContextValue(Tasks.STORE_COMPACTION_STATE_KEY, Tasks.DEFAULT_STORE_COMPACTION_STATE);
        final Function<Set<DataSegment>, Set<DataSegment>> annotateFunction = compactionStateAnnotateFunction(storeCompactionState, toolbox, ingestionSchema);
        // Probably we can publish atomicUpdateGroup along with segments.
        final SegmentsAndCommitMetadata published = awaitPublish(driver.publishAll(inputSegments, segmentsFoundForDrop, publisher, annotateFunction), pushTimeout);
        appenderator.close();
        // for awaitSegmentAvailabilityTimeoutMillis
        if (tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis() > 0 && published != null) {
            ingestionState = IngestionState.SEGMENT_AVAILABILITY_WAIT;
            ArrayList<DataSegment> segmentsToWaitFor = new ArrayList<>(published.getSegments());
            waitForSegmentAvailability(toolbox, segmentsToWaitFor, tuningConfig.getAwaitSegmentAvailabilityTimeoutMillis());
        }
        ingestionState = IngestionState.COMPLETED;
        if (published == null) {
            log.error("Failed to publish segments, aborting!");
            errorMsg = "Failed to publish segments.";
            toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
            return TaskStatus.failure(getId(), errorMsg);
        } else {
            log.info("Processed[%,d] events, unparseable[%,d], thrownAway[%,d].", buildSegmentsMeters.getProcessed(), buildSegmentsMeters.getUnparseable(), buildSegmentsMeters.getThrownAway());
            log.info("Published [%s] segments", published.getSegments().size());
            log.debugSegments(published.getSegments(), "Published segments");
            toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
            return TaskStatus.success(getId());
        }
    } catch (TimeoutException | ExecutionException e) {
        exceptionOccurred = true;
        throw new RuntimeException(e);
    } catch (Exception e) {
        exceptionOccurred = true;
        throw e;
    } finally {
        if (exceptionOccurred) {
            appenderator.closeNow();
        } else {
            appenderator.close();
        }
    }
}
Also used : TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) Comparators(org.apache.druid.java.util.common.guava.Comparators) Produces(javax.ws.rs.Produces) IndexSpec(org.apache.druid.segment.IndexSpec) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) IngestionState(org.apache.druid.indexer.IngestionState) CompletePartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.CompletePartitionAnalysis) MediaType(javax.ws.rs.core.MediaType) JodaUtils(org.apache.druid.java.util.common.JodaUtils) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) Optional(com.google.common.base.Optional) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) FiniteFirehoseFactory(org.apache.druid.data.input.FiniteFirehoseFactory) Map(java.util.Map) IAE(org.apache.druid.java.util.common.IAE) LinearPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.LinearPartitionAnalysis) Property(org.apache.druid.indexer.Property) InputSourceSampler(org.apache.druid.indexing.overlord.sampler.InputSourceSampler) InputFormat(org.apache.druid.data.input.InputFormat) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) BaseAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BaseAppenderatorDriver) FirehoseFactoryToInputSourceAdaptor(org.apache.druid.data.input.FirehoseFactoryToInputSourceAdaptor) Granularity(org.apache.druid.java.util.common.granularity.Granularity) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) AppenderatorConfig(org.apache.druid.segment.realtime.appenderator.AppenderatorConfig) GET(javax.ws.rs.GET) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Rows(org.apache.druid.data.input.Rows) SegmentWriteOutMediumFactory(org.apache.druid.segment.writeout.SegmentWriteOutMediumFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) HttpServletRequest(javax.servlet.http.HttpServletRequest) UOE(org.apache.druid.java.util.common.UOE) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) IndexMerger(org.apache.druid.segment.IndexMerger) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) Throwables(com.google.common.base.Throwables) Include(com.fasterxml.jackson.annotation.JsonInclude.Include) PartialHashSegmentGenerateTask(org.apache.druid.indexing.common.task.batch.parallel.PartialHashSegmentGenerateTask) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TreeMap(java.util.TreeMap) AppendableIndexSpec(org.apache.druid.segment.incremental.AppendableIndexSpec) Preconditions(com.google.common.base.Preconditions) DataSchema(org.apache.druid.segment.indexing.DataSchema) ArbitraryGranularitySpec(org.apache.druid.segment.indexing.granularity.ArbitraryGranularitySpec) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) HashPartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.HashPartitionAnalysis) PartitionAnalysis(org.apache.druid.indexing.common.task.batch.partition.PartitionAnalysis) TimeoutException(java.util.concurrent.TimeoutException) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) QueryParam(javax.ws.rs.QueryParam) DefaultIndexTaskInputRowIteratorBuilder(org.apache.druid.indexing.common.task.batch.parallel.iterator.DefaultIndexTaskInputRowIteratorBuilder) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) Context(javax.ws.rs.core.Context) Predicate(java.util.function.Predicate) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) IngestionSpec(org.apache.druid.segment.indexing.IngestionSpec) Objects(java.util.Objects) List(java.util.List) Response(javax.ws.rs.core.Response) DataSegment(org.apache.druid.timeline.DataSegment) HashFunction(com.google.common.hash.HashFunction) Logger(org.apache.druid.java.util.common.logger.Logger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Hashing(com.google.common.hash.Hashing) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) Function(java.util.function.Function) TuningConfig(org.apache.druid.segment.indexing.TuningConfig) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) JsonTypeName(com.fasterxml.jackson.annotation.JsonTypeName) InputSource(org.apache.druid.data.input.InputSource) ImmutableList(com.google.common.collect.ImmutableList) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) Nonnull(javax.annotation.Nonnull) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) BatchIOConfig(org.apache.druid.segment.indexing.BatchIOConfig) SecondaryPartitionType(org.apache.druid.indexer.partitions.SecondaryPartitionType) Period(org.joda.time.Period) HyperLogLogCollector(org.apache.druid.hll.HyperLogLogCollector) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) CircularBuffer(org.apache.druid.utils.CircularBuffer) TimeUnit(java.util.concurrent.TimeUnit) Checks(org.apache.druid.indexer.Checks) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Collections(java.util.Collections) DefaultIndexTaskInputRowIteratorBuilder(org.apache.druid.indexing.common.task.batch.parallel.iterator.DefaultIndexTaskInputRowIteratorBuilder) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Set(java.util.Set) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) BatchAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.BatchAppenderatorDriver) UOE(org.apache.druid.java.util.common.UOE) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec)

Aggregations

RealtimeIOConfig (org.apache.druid.segment.indexing.RealtimeIOConfig)13 FireDepartment (org.apache.druid.segment.realtime.FireDepartment)13 DataSchema (org.apache.druid.segment.indexing.DataSchema)10 IOException (java.io.IOException)6 RealtimeIndexTask (org.apache.druid.indexing.common.task.RealtimeIndexTask)6 File (java.io.File)5 ExecutionException (java.util.concurrent.ExecutionException)5 TimeoutException (java.util.concurrent.TimeoutException)5 ISE (org.apache.druid.java.util.common.ISE)5 FireDepartmentMetrics (org.apache.druid.segment.realtime.FireDepartmentMetrics)5 Appenderator (org.apache.druid.segment.realtime.appenderator.Appenderator)5 SegmentsAndCommitMetadata (org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 FirehoseFactory (org.apache.druid.data.input.FirehoseFactory)4 InputRow (org.apache.druid.data.input.InputRow)4 TaskStatus (org.apache.druid.indexer.TaskStatus)4 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)4 Test (org.junit.Test)4 Preconditions (com.google.common.base.Preconditions)3 Throwables (com.google.common.base.Throwables)3