Search in sources :

Example 1 with DataSegmentAnnouncer

use of org.apache.druid.server.coordination.DataSegmentAnnouncer in project druid by druid-io.

the class RealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    runThread = Thread.currentThread();
    if (this.plumber != null) {
        throw new IllegalStateException("Plumber must be null");
    }
    setupTimeoutAlert();
    boolean normalExit = true;
    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.
    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);
    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.
    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
    // Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
    // lock to be acquired.
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
            if (lock.isRevoked()) {
                throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegment(segment);
        }

        @Override
        public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
                final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
                if (lock.isRevoked()) {
                    throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
                }
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
                for (DataSegment segment : segments) {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }
        }
    };
    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)
    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy() {

        @Override
        public String getVersion(final Interval interval) {
            try {
                // Side effect: Calling getVersion causes a lock to be acquired
                final TimeChunkLockAcquireAction action = new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs);
                final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action), "Cannot acquire a lock for interval[%s]", interval);
                if (lock.isRevoked()) {
                    throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
                }
                return lock.getVersion();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir()).withVersioningPolicy(versioningPolicy);
    final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
    this.metrics = fireDepartment.getMetrics();
    final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
    // NOTE: (partitionNum_index.zip for HDFS data storage) to mismatch, or it can cause historical nodes to load
    // NOTE: different instances of the "same" segment.
    final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryProcessingPool(), toolbox.getJoinableFactory(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getCachePopulatorStats(), toolbox.getJsonMapper());
    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null ? toolbox.getLookupNodeService() : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
    DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
    try {
        toolbox.getDataSegmentServerAnnouncer().announce();
        toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        plumber.startJob();
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        // Time to read data!
        while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
            Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
        }
    } catch (Throwable e) {
        normalExit = false;
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        throw e;
    } finally {
        if (normalExit) {
            try {
                // Persist if we had actually started.
                if (firehose != null) {
                    log.info("Persisting remaining data.");
                    final Committer committer = committerSupplier.get();
                    final CountDownLatch persistLatch = new CountDownLatch(1);
                    plumber.persist(new Committer() {

                        @Override
                        public Object getMetadata() {
                            return committer.getMetadata();
                        }

                        @Override
                        public void run() {
                            try {
                                committer.run();
                            } finally {
                                persistLatch.countDown();
                            }
                        }
                    });
                    persistLatch.await();
                }
                if (gracefullyStopped) {
                    log.info("Gracefully stopping.");
                } else {
                    log.info("Finishing the job.");
                    synchronized (this) {
                        if (gracefullyStopped) {
                            // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                            log.info("Gracefully stopping.");
                        } else {
                            finishingJob = true;
                        }
                    }
                    if (finishingJob) {
                        plumber.finishJob();
                    }
                }
            } catch (InterruptedException e) {
                log.debug(e, "Interrupted while finishing the job");
            } catch (Exception e) {
                log.makeAlert(e, "Failed to finish realtime task").emit();
                throw e;
            } finally {
                if (firehose != null) {
                    CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
                }
                toolbox.removeMonitor(metricsMonitor);
            }
        }
        toolbox.getDataSegmentServerAnnouncer().unannounce();
        toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
    }
    log.info("Job done!");
    return TaskStatus.success(getId());
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) SegmentPublisher(org.apache.druid.segment.realtime.SegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) PlumberSchool(org.apache.druid.segment.realtime.plumber.PlumberSchool) RealtimePlumberSchool(org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool) LookupNodeService(org.apache.druid.discovery.LookupNodeService) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) LockReleaseAction(org.apache.druid.indexing.common.actions.LockReleaseAction) IOException(java.io.IOException) DataSchema(org.apache.druid.segment.indexing.DataSchema) VersioningPolicy(org.apache.druid.segment.realtime.plumber.VersioningPolicy) RealtimePlumberSchool(org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) RealtimeMetricsMonitor(org.apache.druid.segment.realtime.RealtimeMetricsMonitor) Committer(org.apache.druid.data.input.Committer) Interval(org.joda.time.Interval)

Example 2 with DataSegmentAnnouncer

use of org.apache.druid.server.coordination.DataSegmentAnnouncer in project druid by druid-io.

the class TaskLifecycleTest method setUpTaskToolboxFactory.

private TaskToolboxFactory setUpTaskToolboxFactory(DataSegmentPusher dataSegmentPusher, SegmentHandoffNotifierFactory handoffNotifierFactory, TestIndexerMetadataStorageCoordinator mdc, AppenderatorsManager appenderatorsManager) throws IOException {
    Preconditions.checkNotNull(queryRunnerFactoryConglomerate);
    Preconditions.checkNotNull(monitorScheduler);
    Preconditions.checkNotNull(taskStorage);
    Preconditions.checkNotNull(emitter);
    taskLockbox = new TaskLockbox(taskStorage, mdc);
    tac = new LocalTaskActionClientFactory(taskStorage, new TaskActionToolbox(taskLockbox, taskStorage, mdc, emitter, EasyMock.createMock(SupervisorManager.class)), new TaskAuditLogConfig(true));
    File tmpDir = temporaryFolder.newFolder();
    taskConfig = new TaskConfig(tmpDir.toString(), null, null, 50000, null, false, null, null, null, false, false, TaskConfig.BATCH_PROCESSING_MODE_DEFAULT.name());
    return new TaskToolboxFactory(taskConfig, new DruidNode("druid/middlemanager", "localhost", false, 8091, null, true, false), tac, emitter, dataSegmentPusher, new LocalDataSegmentKiller(new LocalDataSegmentPusherConfig()), new DataSegmentMover() {

        @Override
        public DataSegment move(DataSegment dataSegment, Map<String, Object> targetLoadSpec) {
            return dataSegment;
        }
    }, new DataSegmentArchiver() {

        @Override
        public DataSegment archive(DataSegment segment) {
            return segment;
        }

        @Override
        public DataSegment restore(DataSegment segment) {
            return segment;
        }
    }, new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(DataSegment segment) {
            announcedSinks++;
        }

        @Override
        public void unannounceSegment(DataSegment segment) {
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) {
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) {
        }
    }, // segment announcer
    EasyMock.createNiceMock(DataSegmentServerAnnouncer.class), handoffNotifierFactory, // query runner factory conglomerate corporation unionized collective
    () -> queryRunnerFactoryConglomerate, // query executor service
    DirectQueryProcessingPool.INSTANCE, NoopJoinableFactory.INSTANCE, // monitor scheduler
    () -> monitorScheduler, new SegmentCacheManagerFactory(new DefaultObjectMapper()), MAPPER, INDEX_IO, MapCache.create(0), FireDepartmentTest.NO_CACHE_CONFIG, new CachePopulatorStats(), INDEX_MERGER_V9, EasyMock.createNiceMock(DruidNodeAnnouncer.class), EasyMock.createNiceMock(DruidNode.class), new LookupNodeService("tier"), new DataNodeService("tier", 1000, ServerType.INDEXER_EXECUTOR, 0), new NoopTestTaskReportFileWriter(), null, AuthTestUtils.TEST_AUTHORIZER_MAPPER, new NoopChatHandlerProvider(), TEST_UTILS.getRowIngestionMetersFactory(), appenderatorsManager, new NoopIndexingServiceClient(), null, null, null);
}
Also used : DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) DataSegmentArchiver(org.apache.druid.segment.loading.DataSegmentArchiver) DataSegmentMover(org.apache.druid.segment.loading.DataSegmentMover) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) DefaultTaskConfig(org.apache.druid.indexing.overlord.config.DefaultTaskConfig) DruidNodeAnnouncer(org.apache.druid.discovery.DruidNodeAnnouncer) TaskAuditLogConfig(org.apache.druid.indexing.common.actions.TaskAuditLogConfig) DataSegment(org.apache.druid.timeline.DataSegment) NoopIndexingServiceClient(org.apache.druid.client.indexing.NoopIndexingServiceClient) TaskToolboxFactory(org.apache.druid.indexing.common.TaskToolboxFactory) CachePopulatorStats(org.apache.druid.client.cache.CachePopulatorStats) TaskActionToolbox(org.apache.druid.indexing.common.actions.TaskActionToolbox) LocalTaskActionClientFactory(org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory) NoopChatHandlerProvider(org.apache.druid.segment.realtime.firehose.NoopChatHandlerProvider) LocalDataSegmentPusherConfig(org.apache.druid.segment.loading.LocalDataSegmentPusherConfig) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) LookupNodeService(org.apache.druid.discovery.LookupNodeService) DataSegmentServerAnnouncer(org.apache.druid.server.coordination.DataSegmentServerAnnouncer) SupervisorManager(org.apache.druid.indexing.overlord.supervisor.SupervisorManager) LocalDataSegmentKiller(org.apache.druid.segment.loading.LocalDataSegmentKiller) DruidNode(org.apache.druid.server.DruidNode) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) DataNodeService(org.apache.druid.discovery.DataNodeService) File(java.io.File) NoopTestTaskReportFileWriter(org.apache.druid.indexing.common.task.NoopTestTaskReportFileWriter)

Aggregations

LookupNodeService (org.apache.druid.discovery.LookupNodeService)2 DataSegmentAnnouncer (org.apache.druid.server.coordination.DataSegmentAnnouncer)2 DataSegment (org.apache.druid.timeline.DataSegment)2 File (java.io.File)1 IOException (java.io.IOException)1 CountDownLatch (java.util.concurrent.CountDownLatch)1 CachePopulatorStats (org.apache.druid.client.cache.CachePopulatorStats)1 NoopIndexingServiceClient (org.apache.druid.client.indexing.NoopIndexingServiceClient)1 Committer (org.apache.druid.data.input.Committer)1 FirehoseFactory (org.apache.druid.data.input.FirehoseFactory)1 DataNodeService (org.apache.druid.discovery.DataNodeService)1 DiscoveryDruidNode (org.apache.druid.discovery.DiscoveryDruidNode)1 DruidNodeAnnouncer (org.apache.druid.discovery.DruidNodeAnnouncer)1 SegmentCacheManagerFactory (org.apache.druid.indexing.common.SegmentCacheManagerFactory)1 TaskLock (org.apache.druid.indexing.common.TaskLock)1 TaskToolboxFactory (org.apache.druid.indexing.common.TaskToolboxFactory)1 LocalTaskActionClientFactory (org.apache.druid.indexing.common.actions.LocalTaskActionClientFactory)1 LockReleaseAction (org.apache.druid.indexing.common.actions.LockReleaseAction)1 TaskActionToolbox (org.apache.druid.indexing.common.actions.TaskActionToolbox)1 TaskAuditLogConfig (org.apache.druid.indexing.common.actions.TaskAuditLogConfig)1