Search in sources :

Example 11 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class StreamAppenderatorTest method testMaxRowsInMemoryDisallowIncrementalPersists.

@Test
public void testMaxRowsInMemoryDisallowIncrementalPersists() throws Exception {
    try (final StreamAppenderatorTester tester = new StreamAppenderatorTester(3, false)) {
        final Appenderator appenderator = tester.getAppenderator();
        final AtomicInteger eventCount = new AtomicInteger(0);
        final Supplier<Committer> committerSupplier = () -> {
            final Object metadata = ImmutableMap.of("eventCount", eventCount.get());
            return new Committer() {

                @Override
                public Object getMetadata() {
                    return metadata;
                }

                @Override
                public void run() {
                // Do nothing
                }
            };
        };
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.startJob();
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "foo", 1), committerSupplier, false);
        Assert.assertEquals(1, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar", 1), committerSupplier, false);
        Assert.assertEquals(2, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "bar", 1), committerSupplier, false);
        Assert.assertEquals(2, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "baz", 1), committerSupplier, false);
        Assert.assertEquals(3, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(1), ir("2000", "qux", 1), committerSupplier, false);
        Assert.assertEquals(4, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.add(IDENTIFIERS.get(0), ir("2000", "bob", 1), committerSupplier, false);
        Assert.assertEquals(5, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.persistAll(committerSupplier.get());
        Assert.assertEquals(0, ((StreamAppenderator) appenderator).getRowsInMemory());
        appenderator.close();
    }
}
Also used : AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Committer(org.apache.druid.data.input.Committer) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 12 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class RealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    runThread = Thread.currentThread();
    if (this.plumber != null) {
        throw new IllegalStateException("Plumber must be null");
    }
    setupTimeoutAlert();
    boolean normalExit = true;
    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.
    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(toolbox);
    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.
    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final long lockTimeoutMs = getContextValue(Tasks.LOCK_TIMEOUT_KEY, Tasks.DEFAULT_LOCK_TIMEOUT_MILLIS);
    // Note: if lockTimeoutMs is larger than ServerConfig.maxIdleTime, http timeout error can occur while waiting for a
    // lock to be acquired.
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
            if (lock.isRevoked()) {
                throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegment(segment);
        }

        @Override
        public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
                final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segment.getInterval(), lockTimeoutMs)), "Cannot acquire a lock for interval[%s]", segment.getInterval());
                if (lock.isRevoked()) {
                    throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segment.getInterval()));
                }
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
                for (DataSegment segment : segments) {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }
        }
    };
    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)
    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy() {

        @Override
        public String getVersion(final Interval interval) {
            try {
                // Side effect: Calling getVersion causes a lock to be acquired
                final TimeChunkLockAcquireAction action = new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, interval, lockTimeoutMs);
                final TaskLock lock = Preconditions.checkNotNull(toolbox.getTaskActionClient().submit(action), "Cannot acquire a lock for interval[%s]", interval);
                if (lock.isRevoked()) {
                    throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", interval));
                }
                return lock.getVersion();
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        }
    };
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir()).withVersioningPolicy(versioningPolicy);
    final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
    this.metrics = fireDepartment.getMetrics();
    final RealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartment);
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
    // NOTE: (partitionNum_index.zip for HDFS data storage) to mismatch, or it can cause historical nodes to load
    // NOTE: different instances of the "same" segment.
    final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryProcessingPool(), toolbox.getJoinableFactory(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getCachePopulatorStats(), toolbox.getJsonMapper());
    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    LookupNodeService lookupNodeService = getContextValue(CTX_KEY_LOOKUP_TIER) == null ? toolbox.getLookupNodeService() : new LookupNodeService((String) getContextValue(CTX_KEY_LOOKUP_TIER));
    DiscoveryDruidNode discoveryDruidNode = new DiscoveryDruidNode(toolbox.getDruidNode(), NodeRole.PEON, ImmutableMap.of(toolbox.getDataNodeService().getName(), toolbox.getDataNodeService(), lookupNodeService.getName(), lookupNodeService));
    try {
        toolbox.getDataSegmentServerAnnouncer().announce();
        toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        plumber.startJob();
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        // Time to read data!
        while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
            Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
        }
    } catch (Throwable e) {
        normalExit = false;
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        throw e;
    } finally {
        if (normalExit) {
            try {
                // Persist if we had actually started.
                if (firehose != null) {
                    log.info("Persisting remaining data.");
                    final Committer committer = committerSupplier.get();
                    final CountDownLatch persistLatch = new CountDownLatch(1);
                    plumber.persist(new Committer() {

                        @Override
                        public Object getMetadata() {
                            return committer.getMetadata();
                        }

                        @Override
                        public void run() {
                            try {
                                committer.run();
                            } finally {
                                persistLatch.countDown();
                            }
                        }
                    });
                    persistLatch.await();
                }
                if (gracefullyStopped) {
                    log.info("Gracefully stopping.");
                } else {
                    log.info("Finishing the job.");
                    synchronized (this) {
                        if (gracefullyStopped) {
                            // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                            log.info("Gracefully stopping.");
                        } else {
                            finishingJob = true;
                        }
                    }
                    if (finishingJob) {
                        plumber.finishJob();
                    }
                }
            } catch (InterruptedException e) {
                log.debug(e, "Interrupted while finishing the job");
            } catch (Exception e) {
                log.makeAlert(e, "Failed to finish realtime task").emit();
                throw e;
            } finally {
                if (firehose != null) {
                    CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
                }
                toolbox.removeMonitor(metricsMonitor);
            }
        }
        toolbox.getDataSegmentServerAnnouncer().unannounce();
        toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
    }
    log.info("Job done!");
    return TaskStatus.success(getId());
}
Also used : RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) DataSegment(org.apache.druid.timeline.DataSegment) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) SegmentPublisher(org.apache.druid.segment.realtime.SegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) ISE(org.apache.druid.java.util.common.ISE) IOException(java.io.IOException) PlumberSchool(org.apache.druid.segment.realtime.plumber.PlumberSchool) RealtimePlumberSchool(org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool) LookupNodeService(org.apache.druid.discovery.LookupNodeService) RealtimeTuningConfig(org.apache.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) LockReleaseAction(org.apache.druid.indexing.common.actions.LockReleaseAction) IOException(java.io.IOException) DataSchema(org.apache.druid.segment.indexing.DataSchema) VersioningPolicy(org.apache.druid.segment.realtime.plumber.VersioningPolicy) RealtimePlumberSchool(org.apache.druid.segment.realtime.plumber.RealtimePlumberSchool) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) RealtimeMetricsMonitor(org.apache.druid.segment.realtime.RealtimeMetricsMonitor) Committer(org.apache.druid.data.input.Committer) Interval(org.joda.time.Interval)

Example 13 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method persistAndWait.

private void persistAndWait(StreamAppenderatorDriver driver, Committer committer) {
    try {
        final CountDownLatch persistLatch = new CountDownLatch(1);
        driver.persist(new Committer() {

            @Override
            public Object getMetadata() {
                return committer.getMetadata();
            }

            @Override
            public void run() {
                try {
                    committer.run();
                } finally {
                    persistLatch.countDown();
                }
            }
        });
        persistLatch.await();
    } catch (InterruptedException e) {
        log.debug(e, "Interrupted while finishing the job");
    } catch (Exception e) {
        log.makeAlert(e, "Failed to finish realtime task").emit();
        throw e;
    }
}
Also used : Committer(org.apache.druid.data.input.Committer) CountDownLatch(java.util.concurrent.CountDownLatch) TimeoutException(java.util.concurrent.TimeoutException) ParseException(org.apache.druid.java.util.common.parsers.ParseException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 14 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class AppenderatorDriverRealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) {
    runThread = Thread.currentThread();
    authorizerMapper = toolbox.getAuthorizerMapper();
    rowIngestionMeters = toolbox.getRowIngestionMetersFactory().createRowIngestionMeters();
    parseExceptionHandler = new ParseExceptionHandler(rowIngestionMeters, spec.getTuningConfig().isLogParseExceptions(), spec.getTuningConfig().getMaxParseExceptions(), spec.getTuningConfig().getMaxSavedParseExceptions());
    setupTimeoutAlert();
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeAppenderatorTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(toolbox.getPersistDir());
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null), null);
    final TaskRealtimeMetricsMonitor metricsMonitor = TaskRealtimeMetricsMonitorBuilder.build(this, fireDepartmentForMetrics, rowIngestionMeters);
    this.metrics = fireDepartmentForMetrics.getMetrics();
    final Supplier<Committer> committerSupplier = Committers.nilSupplier();
    DiscoveryDruidNode discoveryDruidNode = createDiscoveryDruidNode(toolbox);
    appenderator = newAppenderator(dataSchema, tuningConfig, metrics, toolbox);
    TaskLockType lockType = getContextValue(Tasks.USE_SHARED_LOCK, false) ? TaskLockType.SHARED : TaskLockType.EXCLUSIVE;
    StreamAppenderatorDriver driver = newDriver(dataSchema, appenderator, toolbox, metrics, lockType);
    try {
        log.debug("Found chat handler of class[%s]", toolbox.getChatHandlerProvider().getClass().getName());
        toolbox.getChatHandlerProvider().register(getId(), this, false);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().announce();
            toolbox.getDruidNodeAnnouncer().announce(discoveryDruidNode);
        }
        driver.startJob(segmentId -> {
            try {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    return toolbox.getTaskActionClient().submit(new SegmentLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), segmentId.getVersion(), segmentId.getShardSpec().getPartitionNum(), 1000L)).isOk();
                } else {
                    final TaskLock lock = toolbox.getTaskActionClient().submit(new TimeChunkLockAcquireAction(TaskLockType.EXCLUSIVE, segmentId.getInterval(), 1000L));
                    if (lock == null) {
                        return false;
                    }
                    if (lock.isRevoked()) {
                        throw new ISE(StringUtils.format("Lock for interval [%s] was revoked.", segmentId.getInterval()));
                    }
                    return true;
                }
            } catch (IOException e) {
                throw new RuntimeException(e);
            }
        });
        // Set up metrics emission
        toolbox.addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        int sequenceNumber = 0;
        String sequenceName = makeSequenceName(getId(), sequenceNumber);
        final TransactionalSegmentPublisher publisher = (mustBeNullOrEmptyOverwriteSegments, mustBeNullOrEmptyDropSegments, segments, commitMetadata) -> {
            if (mustBeNullOrEmptyOverwriteSegments != null && !mustBeNullOrEmptyOverwriteSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to overwrite segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyOverwriteSegments));
            }
            if (mustBeNullOrEmptyDropSegments != null && !mustBeNullOrEmptyDropSegments.isEmpty()) {
                throw new ISE("Stream ingestion task unexpectedly attempted to drop segments: %s", SegmentUtils.commaSeparatedIdentifiers(mustBeNullOrEmptyDropSegments));
            }
            final SegmentTransactionalInsertAction action = SegmentTransactionalInsertAction.appendAction(segments, null, null);
            return toolbox.getTaskActionClient().submit(action);
        };
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(Preconditions.checkNotNull(spec.getDataSchema().getParser(), "inputRowParser"), toolbox.getIndexingTmpDir());
            }
        }
        ingestionState = IngestionState.BUILD_SEGMENTS;
        // Time to read data!
        while (!gracefullyStopped && firehoseDrainableByClosing && firehose.hasMore()) {
            try {
                InputRow inputRow = firehose.nextRow();
                if (inputRow == null) {
                    log.debug("Discarded null row, considering thrownAway.");
                    rowIngestionMeters.incrementThrownAway();
                } else {
                    AppenderatorDriverAddResult addResult = driver.add(inputRow, sequenceName, committerSupplier);
                    if (addResult.isOk()) {
                        final boolean isPushRequired = addResult.isPushRequired(tuningConfig.getPartitionsSpec().getMaxRowsPerSegment(), tuningConfig.getPartitionsSpec().getMaxTotalRowsOr(DynamicPartitionsSpec.DEFAULT_MAX_TOTAL_ROWS));
                        if (isPushRequired) {
                            publishSegments(driver, publisher, committerSupplier, sequenceName);
                            sequenceNumber++;
                            sequenceName = makeSequenceName(getId(), sequenceNumber);
                        }
                    } else {
                        // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                }
            } catch (ParseException e) {
                handleParseException(e);
            }
        }
        ingestionState = IngestionState.COMPLETED;
        if (!gracefullyStopped) {
            synchronized (this) {
                if (gracefullyStopped) {
                    // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                    log.info("Gracefully stopping.");
                } else {
                    finishingJob = true;
                }
            }
            if (finishingJob) {
                log.info("Finishing job...");
                // Publish any remaining segments
                publishSegments(driver, publisher, committerSupplier, sequenceName);
                waitForSegmentPublishAndHandoff(tuningConfig.getPublishAndHandoffTimeout());
            }
        } else if (firehose != null) {
            log.info("Task was gracefully stopped, will persist data before exiting");
            persistAndWait(driver, committerSupplier.get());
        }
    } catch (Throwable e) {
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        errorMsg = Throwables.getStackTraceAsString(e);
        toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
        return TaskStatus.failure(getId(), errorMsg);
    } finally {
        toolbox.getChatHandlerProvider().unregister(getId());
        CloseableUtils.closeAndSuppressExceptions(firehose, e -> log.warn("Failed to close Firehose"));
        appenderator.close();
        CloseableUtils.closeAndSuppressExceptions(driver, e -> log.warn("Failed to close AppenderatorDriver"));
        toolbox.removeMonitor(metricsMonitor);
        if (toolbox.getAppenderatorsManager().shouldTaskMakeNodeAnnouncements()) {
            toolbox.getDataSegmentServerAnnouncer().unannounce();
            toolbox.getDruidNodeAnnouncer().unannounce(discoveryDruidNode);
        }
    }
    log.info("Job done!");
    toolbox.getTaskReportFileWriter().write(getId(), getTaskCompletionReports());
    return TaskStatus.success(getId());
}
Also used : StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) TaskReport(org.apache.druid.indexing.common.TaskReport) TaskToolbox(org.apache.druid.indexing.common.TaskToolbox) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) LookupNodeService(org.apache.druid.discovery.LookupNodeService) Produces(javax.ws.rs.Produces) AuthorizerMapper(org.apache.druid.server.security.AuthorizerMapper) Path(javax.ws.rs.Path) TimeoutException(java.util.concurrent.TimeoutException) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Timer(java.util.Timer) IngestionState(org.apache.druid.indexer.IngestionState) NoopQueryRunner(org.apache.druid.query.NoopQueryRunner) MonotonicNonNull(org.checkerframework.checker.nullness.qual.MonotonicNonNull) MediaType(javax.ws.rs.core.MediaType) ChatHandler(org.apache.druid.segment.realtime.firehose.ChatHandler) TaskActionClient(org.apache.druid.indexing.common.actions.TaskActionClient) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) Map(java.util.Map) TaskLock(org.apache.druid.indexing.common.TaskLock) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) QueryRunner(org.apache.druid.query.QueryRunner) TimerTask(java.util.TimerTask) DateTimes(org.apache.druid.java.util.common.DateTimes) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) Context(javax.ws.rs.core.Context) RealtimeAppenderatorIngestionSpec(org.apache.druid.indexing.common.index.RealtimeAppenderatorIngestionSpec) ImmutableMap(com.google.common.collect.ImmutableMap) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) IngestionStatsAndErrorsTaskReportData(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReportData) StringUtils(org.apache.druid.java.util.common.StringUtils) ISE(org.apache.druid.java.util.common.ISE) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) Action(org.apache.druid.server.security.Action) LockGranularity(org.apache.druid.indexing.common.LockGranularity) TaskRealtimeMetricsMonitorBuilder(org.apache.druid.indexing.common.TaskRealtimeMetricsMonitorBuilder) InputRow(org.apache.druid.data.input.InputRow) CountDownLatch(java.util.concurrent.CountDownLatch) Firehose(org.apache.druid.data.input.Firehose) List(java.util.List) Response(javax.ws.rs.core.Response) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TaskLockType(org.apache.druid.indexing.common.TaskLockType) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) NodeRole(org.apache.druid.discovery.NodeRole) SegmentAllocateAction(org.apache.druid.indexing.common.actions.SegmentAllocateAction) Queue(java.util.Queue) ConcurrentLinkedQueue(java.util.concurrent.ConcurrentLinkedQueue) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ActionBasedSegmentAllocator(org.apache.druid.indexing.appenderator.ActionBasedSegmentAllocator) NumberedPartialShardSpec(org.apache.druid.timeline.partition.NumberedPartialShardSpec) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) GET(javax.ws.rs.GET) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) ParseException(org.apache.druid.java.util.common.parsers.ParseException) Supplier(com.google.common.base.Supplier) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) HashMap(java.util.HashMap) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) TaskStatus(org.apache.druid.indexer.TaskStatus) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) HttpServletRequest(javax.servlet.http.HttpServletRequest) Query(org.apache.druid.query.Query) JsonIgnore(com.fasterxml.jackson.annotation.JsonIgnore) SegmentsAndCommitMetadata(org.apache.druid.segment.realtime.appenderator.SegmentsAndCommitMetadata) Appenderator(org.apache.druid.segment.realtime.appenderator.Appenderator) ActionBasedUsedSegmentChecker(org.apache.druid.indexing.appenderator.ActionBasedUsedSegmentChecker) ParseExceptionReport(org.apache.druid.segment.incremental.ParseExceptionReport) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) Throwables(com.google.common.base.Throwables) Committers(org.apache.druid.segment.realtime.plumber.Committers) IOException(java.io.IOException) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) Futures(com.google.common.util.concurrent.Futures) IngestionStatsAndErrorsTaskReport(org.apache.druid.indexing.common.IngestionStatsAndErrorsTaskReport) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) SegmentUtils(org.apache.druid.segment.SegmentUtils) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) AsyncFunction(com.google.common.util.concurrent.AsyncFunction) VisibleForTesting(com.google.common.annotations.VisibleForTesting) StreamAppenderatorDriver(org.apache.druid.segment.realtime.appenderator.StreamAppenderatorDriver) DataSchema(org.apache.druid.segment.indexing.DataSchema) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) RealtimeIOConfig(org.apache.druid.segment.indexing.RealtimeIOConfig) EventReceiverFirehoseFactory(org.apache.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(org.apache.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(org.apache.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) SegmentTransactionalInsertAction(org.apache.druid.indexing.common.actions.SegmentTransactionalInsertAction) SegmentLockAcquireAction(org.apache.druid.indexing.common.actions.SegmentLockAcquireAction) FireDepartment(org.apache.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(org.apache.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) TaskLock(org.apache.druid.indexing.common.TaskLock) RealtimeAppenderatorTuningConfig(org.apache.druid.indexing.common.index.RealtimeAppenderatorTuningConfig) ISE(org.apache.druid.java.util.common.ISE) TaskRealtimeMetricsMonitor(org.apache.druid.indexing.common.stats.TaskRealtimeMetricsMonitor) IOException(java.io.IOException) AppenderatorDriverAddResult(org.apache.druid.segment.realtime.appenderator.AppenderatorDriverAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) DiscoveryDruidNode(org.apache.druid.discovery.DiscoveryDruidNode) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) TaskLockType(org.apache.druid.indexing.common.TaskLockType) InputRow(org.apache.druid.data.input.InputRow) TimeChunkLockAcquireAction(org.apache.druid.indexing.common.actions.TimeChunkLockAcquireAction) Committer(org.apache.druid.data.input.Committer) ParseException(org.apache.druid.java.util.common.parsers.ParseException)

Example 15 with Committer

use of org.apache.druid.data.input.Committer in project druid by druid-io.

the class AppenderatorImpl method push.

@Override
public ListenableFuture<SegmentsAndCommitMetadata> push(final Collection<SegmentIdWithShardSpec> identifiers, @Nullable final Committer committer, final boolean useUniquePath) {
    final Map<SegmentIdWithShardSpec, Sink> theSinks = new HashMap<>();
    AtomicLong pushedHydrantsCount = new AtomicLong();
    for (final SegmentIdWithShardSpec identifier : identifiers) {
        final Sink sink = sinks.get(identifier);
        if (sink == null) {
            throw new ISE("No sink for identifier: %s", identifier);
        }
        theSinks.put(identifier, sink);
        if (sink.finishWriting()) {
            totalRows.addAndGet(-sink.getNumRows());
        }
        // count hydrants for stats:
        pushedHydrantsCount.addAndGet(Iterables.size(sink));
    }
    return Futures.transform(// segments.
    persistAll(committer), (Function<Object, SegmentsAndCommitMetadata>) commitMetadata -> {
        final List<DataSegment> dataSegments = new ArrayList<>();
        log.info("Preparing to push (stats): processed rows: [%d], sinks: [%d], fireHydrants (across sinks): [%d]", rowIngestionMeters.getProcessed(), theSinks.size(), pushedHydrantsCount.get());
        log.debug("Building and pushing segments: %s", theSinks.keySet().stream().map(SegmentIdWithShardSpec::toString).collect(Collectors.joining(", ")));
        for (Map.Entry<SegmentIdWithShardSpec, Sink> entry : theSinks.entrySet()) {
            if (droppingSinks.contains(entry.getKey())) {
                log.warn("Skipping push of currently-dropping sink[%s]", entry.getKey());
                continue;
            }
            final DataSegment dataSegment = mergeAndPush(entry.getKey(), entry.getValue(), useUniquePath);
            if (dataSegment != null) {
                dataSegments.add(dataSegment);
            } else {
                log.warn("mergeAndPush[%s] returned null, skipping.", entry.getKey());
            }
        }
        log.info("Push complete...");
        return new SegmentsAndCommitMetadata(dataSegments, commitMetadata);
    }, pushExecutor);
}
Also used : DataSegmentAnnouncer(org.apache.druid.server.coordination.DataSegmentAnnouncer) Arrays(java.util.Arrays) FireDepartmentMetrics(org.apache.druid.segment.realtime.FireDepartmentMetrics) Pair(org.apache.druid.java.util.common.Pair) FileLock(java.nio.channels.FileLock) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) QueryRunner(org.apache.druid.query.QueryRunner) IAE(org.apache.druid.java.util.common.IAE) FileUtils(org.apache.druid.java.util.common.FileUtils) DateTimes(org.apache.druid.java.util.common.DateTimes) Function(com.google.common.base.Function) Execs(org.apache.druid.java.util.common.concurrent.Execs) IdentityHashMap(java.util.IdentityHashMap) Closer(org.apache.druid.java.util.common.io.Closer) Collection(java.util.Collection) QueryableIndex(org.apache.druid.segment.QueryableIndex) StandardOpenOption(java.nio.file.StandardOpenOption) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) Collectors(java.util.stream.Collectors) Sets(com.google.common.collect.Sets) InputRow(org.apache.druid.data.input.InputRow) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) SegmentId(org.apache.druid.timeline.SegmentId) MutableLong(org.apache.commons.lang.mutable.MutableLong) QueryableIndexSegment(org.apache.druid.segment.QueryableIndexSegment) Joiner(com.google.common.base.Joiner) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) Iterables(com.google.common.collect.Iterables) MoreExecutors(com.google.common.util.concurrent.MoreExecutors) ParseExceptionHandler(org.apache.druid.segment.incremental.ParseExceptionHandler) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) Stopwatch(com.google.common.base.Stopwatch) Supplier(com.google.common.base.Supplier) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) HashMap(java.util.HashMap) Callable(java.util.concurrent.Callable) RowIngestionMeters(org.apache.druid.segment.incremental.RowIngestionMeters) ArrayList(java.util.ArrayList) ConcurrentMap(java.util.concurrent.ConcurrentMap) BaseProgressIndicator(org.apache.druid.segment.BaseProgressIndicator) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) Query(org.apache.druid.query.Query) Sink(org.apache.druid.segment.realtime.plumber.Sink) RetryUtils(org.apache.druid.java.util.common.RetryUtils) QuerySegmentWalker(org.apache.druid.query.QuerySegmentWalker) Nullable(javax.annotation.Nullable) Segment(org.apache.druid.segment.Segment) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ReentrantLock(java.util.concurrent.locks.ReentrantLock) RE(org.apache.druid.java.util.common.RE) IndexMerger(org.apache.druid.segment.IndexMerger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) FireHydrant(org.apache.druid.segment.realtime.FireHydrant) IOException(java.io.IOException) Ints(com.google.common.primitives.Ints) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) FutureCallback(com.google.common.util.concurrent.FutureCallback) File(java.io.File) ExecutionException(java.util.concurrent.ExecutionException) TimeUnit(java.util.concurrent.TimeUnit) AtomicLong(java.util.concurrent.atomic.AtomicLong) Futures(com.google.common.util.concurrent.Futures) Lock(java.util.concurrent.locks.Lock) Closeable(java.io.Closeable) Committer(org.apache.druid.data.input.Committer) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) SegmentDescriptor(org.apache.druid.query.SegmentDescriptor) Cache(org.apache.druid.client.cache.Cache) IndexIO(org.apache.druid.segment.IndexIO) IncrementalIndexAddResult(org.apache.druid.segment.incremental.IncrementalIndexAddResult) DataSchema(org.apache.druid.segment.indexing.DataSchema) FileChannel(java.nio.channels.FileChannel) Collections(java.util.Collections) IdentityHashMap(java.util.IdentityHashMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) AtomicLong(java.util.concurrent.atomic.AtomicLong) Sink(org.apache.druid.segment.realtime.plumber.Sink) ISE(org.apache.druid.java.util.common.ISE) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList)

Aggregations

Committer (org.apache.druid.data.input.Committer)22 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)13 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)11 Test (org.junit.Test)11 Supplier (com.google.common.base.Supplier)8 IOException (java.io.IOException)8 List (java.util.List)7 ISE (org.apache.druid.java.util.common.ISE)7 VisibleForTesting (com.google.common.annotations.VisibleForTesting)6 Preconditions (com.google.common.base.Preconditions)6 Futures (com.google.common.util.concurrent.Futures)6 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)6 File (java.io.File)6 ArrayList (java.util.ArrayList)6 HashMap (java.util.HashMap)6 Map (java.util.Map)6 CountDownLatch (java.util.concurrent.CountDownLatch)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)5 Function (com.google.common.base.Function)5 Lists (com.google.common.collect.Lists)5