Search in sources :

Example 1 with RealtimeMetricsMonitor

use of io.druid.segment.realtime.RealtimeMetricsMonitor in project druid by druid-io.

the class KafkaIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    log.info("Starting up!");
    startTime = DateTime.now();
    mapper = toolbox.getObjectMapper();
    status = Status.STARTING;
    if (chatHandlerProvider.isPresent()) {
        log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName());
        chatHandlerProvider.get().register(getId(), this, false);
    } else {
        log.warn("No chat handler detected");
    }
    runThread = Thread.currentThread();
    // Set up FireDepartmentMetrics
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
    fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
    try (final Appenderator appenderator0 = newAppenderator(fireDepartmentMetrics, toolbox);
        final FiniteAppenderatorDriver driver = newDriver(appenderator0, toolbox, fireDepartmentMetrics);
        final KafkaConsumer<byte[], byte[]> consumer = newConsumer()) {
        appenderator = appenderator0;
        final String topic = ioConfig.getStartPartitions().getTopic();
        // Start up, set up initial offsets.
        final Object restoredMetadata = driver.startJob();
        if (restoredMetadata == null) {
            nextOffsets.putAll(ioConfig.getStartPartitions().getPartitionOffsetMap());
        } else {
            final Map<String, Object> restoredMetadataMap = (Map) restoredMetadata;
            final KafkaPartitions restoredNextPartitions = toolbox.getObjectMapper().convertValue(restoredMetadataMap.get(METADATA_NEXT_PARTITIONS), KafkaPartitions.class);
            nextOffsets.putAll(restoredNextPartitions.getPartitionOffsetMap());
            // Sanity checks.
            if (!restoredNextPartitions.getTopic().equals(ioConfig.getStartPartitions().getTopic())) {
                throw new ISE("WTF?! Restored topic[%s] but expected topic[%s]", restoredNextPartitions.getTopic(), ioConfig.getStartPartitions().getTopic());
            }
            if (!nextOffsets.keySet().equals(ioConfig.getStartPartitions().getPartitionOffsetMap().keySet())) {
                throw new ISE("WTF?! Restored partitions[%s] but expected partitions[%s]", nextOffsets.keySet(), ioConfig.getStartPartitions().getPartitionOffsetMap().keySet());
            }
        }
        // Set up sequenceNames.
        final Map<Integer, String> sequenceNames = Maps.newHashMap();
        for (Integer partitionNum : nextOffsets.keySet()) {
            sequenceNames.put(partitionNum, String.format("%s_%s", ioConfig.getBaseSequenceName(), partitionNum));
        }
        // Set up committer.
        final Supplier<Committer> committerSupplier = new Supplier<Committer>() {

            @Override
            public Committer get() {
                final Map<Integer, Long> snapshot = ImmutableMap.copyOf(nextOffsets);
                return new Committer() {

                    @Override
                    public Object getMetadata() {
                        return ImmutableMap.of(METADATA_NEXT_PARTITIONS, new KafkaPartitions(ioConfig.getStartPartitions().getTopic(), snapshot));
                    }

                    @Override
                    public void run() {
                    // Do nothing.
                    }
                };
            }
        };
        Set<Integer> assignment = assignPartitionsAndSeekToNext(consumer, topic);
        // Main loop.
        // Could eventually support leader/follower mode (for keeping replicas more in sync)
        boolean stillReading = !assignment.isEmpty();
        status = Status.READING;
        try {
            while (stillReading) {
                if (possiblyPause(assignment)) {
                    // The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
                    // partitions upon resuming. This is safe even if the end offsets have not been modified.
                    assignment = assignPartitionsAndSeekToNext(consumer, topic);
                    if (assignment.isEmpty()) {
                        log.info("All partitions have been fully read");
                        publishOnStop = true;
                        stopRequested = true;
                    }
                }
                if (stopRequested) {
                    break;
                }
                // The retrying business is because the KafkaConsumer throws OffsetOutOfRangeException if the seeked-to
                // offset is not present in the topic-partition. This can happen if we're asking a task to read from data
                // that has not been written yet (which is totally legitimate). So let's wait for it to show up.
                ConsumerRecords<byte[], byte[]> records = ConsumerRecords.empty();
                try {
                    records = consumer.poll(POLL_TIMEOUT);
                } catch (OffsetOutOfRangeException e) {
                    log.warn("OffsetOutOfRangeException with message [%s]", e.getMessage());
                    possiblyResetOffsetsOrWait(e.offsetOutOfRangePartitions(), consumer, toolbox);
                    stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
                }
                for (ConsumerRecord<byte[], byte[]> record : records) {
                    if (log.isTraceEnabled()) {
                        log.trace("Got topic[%s] partition[%d] offset[%,d].", record.topic(), record.partition(), record.offset());
                    }
                    if (record.offset() < endOffsets.get(record.partition())) {
                        if (record.offset() != nextOffsets.get(record.partition())) {
                            throw new ISE("WTF?! Got offset[%,d] after offset[%,d] in partition[%d].", record.offset(), nextOffsets.get(record.partition()), record.partition());
                        }
                        try {
                            final byte[] valueBytes = record.value();
                            if (valueBytes == null) {
                                throw new ParseException("null value");
                            }
                            final InputRow row = Preconditions.checkNotNull(parser.parse(ByteBuffer.wrap(valueBytes)), "row");
                            if (!ioConfig.getMinimumMessageTime().isPresent() || !ioConfig.getMinimumMessageTime().get().isAfter(row.getTimestamp())) {
                                final SegmentIdentifier identifier = driver.add(row, sequenceNames.get(record.partition()), committerSupplier);
                                if (identifier == null) {
                                    // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                                    throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
                                }
                                fireDepartmentMetrics.incrementProcessed();
                            } else {
                                fireDepartmentMetrics.incrementThrownAway();
                            }
                        } catch (ParseException e) {
                            if (tuningConfig.isReportParseExceptions()) {
                                throw e;
                            } else {
                                log.debug(e, "Dropping unparseable row from partition[%d] offset[%,d].", record.partition(), record.offset());
                                fireDepartmentMetrics.incrementUnparseable();
                            }
                        }
                        nextOffsets.put(record.partition(), record.offset() + 1);
                    }
                    if (nextOffsets.get(record.partition()).equals(endOffsets.get(record.partition())) && assignment.remove(record.partition())) {
                        log.info("Finished reading topic[%s], partition[%,d].", record.topic(), record.partition());
                        assignPartitions(consumer, topic, assignment);
                        stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
                    }
                }
            }
        } finally {
            // persist pending data
            driver.persist(committerSupplier.get());
        }
        synchronized (statusLock) {
            if (stopRequested && !publishOnStop) {
                throw new InterruptedException("Stopping without publishing");
            }
            status = Status.PUBLISHING;
        }
        final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {

            @Override
            public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
                final KafkaPartitions finalPartitions = toolbox.getObjectMapper().convertValue(((Map) commitMetadata).get(METADATA_NEXT_PARTITIONS), KafkaPartitions.class);
                // Sanity check, we should only be publishing things that match our desired end state.
                if (!endOffsets.equals(finalPartitions.getPartitionOffsetMap())) {
                    throw new ISE("WTF?! Driver attempted to publish invalid metadata[%s].", commitMetadata);
                }
                final SegmentTransactionalInsertAction action;
                if (ioConfig.isUseTransaction()) {
                    action = new SegmentTransactionalInsertAction(segments, new KafkaDataSourceMetadata(ioConfig.getStartPartitions()), new KafkaDataSourceMetadata(finalPartitions));
                } else {
                    action = new SegmentTransactionalInsertAction(segments, null, null);
                }
                log.info("Publishing with isTransaction[%s].", ioConfig.isUseTransaction());
                return toolbox.getTaskActionClient().submit(action).isSuccess();
            }
        };
        final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
        if (published == null) {
            throw new ISE("Transaction failure publishing segments, aborting");
        } else {
            log.info("Published segments[%s] with metadata[%s].", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {

                @Override
                public String apply(DataSegment input) {
                    return input.getIdentifier();
                }
            })), published.getCommitMetadata());
        }
    } catch (InterruptedException | RejectedExecutionException e) {
        // handle the InterruptedException that gets wrapped in a RejectedExecutionException
        if (e instanceof RejectedExecutionException && (e.getCause() == null || !(e.getCause() instanceof InterruptedException))) {
            throw e;
        }
        // if we were interrupted because we were asked to stop, handle the exception and return success, else rethrow
        if (!stopRequested) {
            Thread.currentThread().interrupt();
            throw e;
        }
        log.info("The task was asked to stop before completing");
    } finally {
        if (chatHandlerProvider.isPresent()) {
            chatHandlerProvider.get().unregister(getId());
        }
    }
    return success();
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) Set(java.util.Set) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) SegmentTransactionalInsertAction(io.druid.indexing.common.actions.SegmentTransactionalInsertAction) DataSegment(io.druid.timeline.DataSegment) FireDepartment(io.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ISE(io.druid.java.util.common.ISE) Supplier(com.google.common.base.Supplier) SegmentsAndMetadata(io.druid.segment.realtime.appenderator.SegmentsAndMetadata) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) Appenderator(io.druid.segment.realtime.appenderator.Appenderator) FiniteAppenderatorDriver(io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver) InputRow(io.druid.data.input.InputRow) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) ParseException(io.druid.java.util.common.parsers.ParseException) OffsetOutOfRangeException(org.apache.kafka.clients.consumer.OffsetOutOfRangeException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Example 2 with RealtimeMetricsMonitor

use of io.druid.segment.realtime.RealtimeMetricsMonitor in project druid by druid-io.

the class IndexTask method generateAndPublishSegments.

private boolean generateAndPublishSegments(final TaskToolbox toolbox, final DataSchema dataSchema, final Map<Interval, List<ShardSpec>> shardSpecs, final String version, final FirehoseFactory firehoseFactory) throws IOException, InterruptedException {
    final GranularitySpec granularitySpec = dataSchema.getGranularitySpec();
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
    final FireDepartmentMetrics fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    final Map<String, ShardSpec> sequenceNameToShardSpecMap = Maps.newHashMap();
    if (toolbox.getMonitorScheduler() != null) {
        toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
    }
    final SegmentAllocator segmentAllocator;
    if (ingestionSchema.getIOConfig().isAppendToExisting()) {
        segmentAllocator = new ActionBasedSegmentAllocator(toolbox.getTaskActionClient(), dataSchema);
    } else {
        segmentAllocator = new SegmentAllocator() {

            @Override
            public SegmentIdentifier allocate(DateTime timestamp, String sequenceName, String previousSegmentId) throws IOException {
                Optional<Interval> interval = granularitySpec.bucketInterval(timestamp);
                if (!interval.isPresent()) {
                    throw new ISE("Could not find interval for timestamp [%s]", timestamp);
                }
                ShardSpec shardSpec = sequenceNameToShardSpecMap.get(sequenceName);
                if (shardSpec == null) {
                    throw new ISE("Could not find ShardSpec for sequenceName [%s]", sequenceName);
                }
                return new SegmentIdentifier(getDataSource(), interval.get(), version, shardSpec);
            }
        };
    }
    try (final Appenderator appenderator = newAppenderator(fireDepartmentMetrics, toolbox, dataSchema);
        final FiniteAppenderatorDriver driver = newDriver(appenderator, toolbox, segmentAllocator, fireDepartmentMetrics);
        final Firehose firehose = firehoseFactory.connect(dataSchema.getParser())) {
        final Supplier<Committer> committerSupplier = Committers.supplierFromFirehose(firehose);
        final Map<Interval, ShardSpecLookup> shardSpecLookups = Maps.newHashMap();
        if (driver.startJob() != null) {
            driver.clear();
        }
        try {
            while (firehose.hasMore()) {
                try {
                    final InputRow inputRow = firehose.nextRow();
                    final Optional<Interval> optInterval = granularitySpec.bucketInterval(inputRow.getTimestamp());
                    if (!optInterval.isPresent()) {
                        fireDepartmentMetrics.incrementThrownAway();
                        continue;
                    }
                    final Interval interval = optInterval.get();
                    if (!shardSpecLookups.containsKey(interval)) {
                        final List<ShardSpec> intervalShardSpecs = shardSpecs.get(interval);
                        if (intervalShardSpecs == null || intervalShardSpecs.isEmpty()) {
                            throw new ISE("Failed to get shardSpec for interval[%s]", interval);
                        }
                        shardSpecLookups.put(interval, intervalShardSpecs.get(0).getLookup(intervalShardSpecs));
                    }
                    final ShardSpec shardSpec = shardSpecLookups.get(interval).getShardSpec(inputRow.getTimestampFromEpoch(), inputRow);
                    final String sequenceName = String.format("index_%s_%s_%d", interval, version, shardSpec.getPartitionNum());
                    if (!sequenceNameToShardSpecMap.containsKey(sequenceName)) {
                        final ShardSpec shardSpecForPublishing = ingestionSchema.getTuningConfig().isForceExtendableShardSpecs() || ingestionSchema.getIOConfig().isAppendToExisting() ? new NumberedShardSpec(shardSpec.getPartitionNum(), shardSpecs.get(interval).size()) : shardSpec;
                        sequenceNameToShardSpecMap.put(sequenceName, shardSpecForPublishing);
                    }
                    final SegmentIdentifier identifier = driver.add(inputRow, sequenceName, committerSupplier);
                    if (identifier == null) {
                        throw new ISE("Could not allocate segment for row with timestamp[%s]", inputRow.getTimestamp());
                    }
                    fireDepartmentMetrics.incrementProcessed();
                } catch (ParseException e) {
                    if (ingestionSchema.getTuningConfig().isReportParseExceptions()) {
                        throw e;
                    } else {
                        fireDepartmentMetrics.incrementUnparseable();
                    }
                }
            }
        } finally {
            driver.persist(committerSupplier.get());
        }
        final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {

            @Override
            public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
                final SegmentTransactionalInsertAction action = new SegmentTransactionalInsertAction(segments, null, null);
                return toolbox.getTaskActionClient().submit(action).isSuccess();
            }
        };
        final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
        if (published == null) {
            log.error("Failed to publish segments, aborting!");
            return false;
        } else {
            log.info("Published segments[%s]", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {

                @Override
                public String apply(DataSegment input) {
                    return input.getIdentifier();
                }
            })));
            return true;
        }
    }
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) SortedSet(java.util.SortedSet) Set(java.util.Set) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) ShardSpecLookup(io.druid.timeline.partition.ShardSpecLookup) SegmentTransactionalInsertAction(io.druid.indexing.common.actions.SegmentTransactionalInsertAction) DataSegment(io.druid.timeline.DataSegment) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) ShardSpec(io.druid.timeline.partition.ShardSpec) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) DateTime(org.joda.time.DateTime) FireDepartment(io.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ActionBasedSegmentAllocator(io.druid.indexing.appenderator.ActionBasedSegmentAllocator) ISE(io.druid.java.util.common.ISE) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(io.druid.timeline.partition.HashBasedNumberedShardSpec) Optional(com.google.common.base.Optional) Firehose(io.druid.data.input.Firehose) SegmentsAndMetadata(io.druid.segment.realtime.appenderator.SegmentsAndMetadata) IOException(java.io.IOException) FireDepartmentMetrics(io.druid.segment.realtime.FireDepartmentMetrics) Appenderator(io.druid.segment.realtime.appenderator.Appenderator) GranularitySpec(io.druid.segment.indexing.granularity.GranularitySpec) ActionBasedSegmentAllocator(io.druid.indexing.appenderator.ActionBasedSegmentAllocator) SegmentAllocator(io.druid.segment.realtime.appenderator.SegmentAllocator) FiniteAppenderatorDriver(io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver) InputRow(io.druid.data.input.InputRow) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) ParseException(io.druid.java.util.common.parsers.ParseException) Interval(org.joda.time.Interval)

Example 3 with RealtimeMetricsMonitor

use of io.druid.segment.realtime.RealtimeMetricsMonitor in project druid by druid-io.

the class RealtimeIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    runThread = Thread.currentThread();
    if (this.plumber != null) {
        throw new IllegalStateException("WTF?!? run with non-null plumber??!");
    }
    boolean normalExit = true;
    // It would be nice to get the PlumberSchool in the constructor.  Although that will need jackson injectables for
    // stuff like the ServerView, which seems kind of odd?  Perhaps revisit this when Guice has been introduced.
    final SegmentPublisher segmentPublisher = new TaskActionSegmentPublisher(this, toolbox);
    // NOTE: We talk to the coordinator in various places in the plumber and we could be more robust to issues
    // with the coordinator.  Right now, we'll block/throw in whatever thread triggered the coordinator behavior,
    // which will typically be either the main data processing loop or the persist thread.
    // Wrap default DataSegmentAnnouncer such that we unlock intervals as we unannounce segments
    final DataSegmentAnnouncer lockingSegmentAnnouncer = new DataSegmentAnnouncer() {

        @Override
        public void announceSegment(final DataSegment segment) throws IOException {
            // Side effect: Calling announceSegment causes a lock to be acquired
            toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            toolbox.getSegmentAnnouncer().announceSegment(segment);
        }

        @Override
        public void unannounceSegment(final DataSegment segment) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegment(segment);
            } finally {
                toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
            }
        }

        @Override
        public void announceSegments(Iterable<DataSegment> segments) throws IOException {
            // Side effect: Calling announceSegments causes locks to be acquired
            for (DataSegment segment : segments) {
                toolbox.getTaskActionClient().submit(new LockAcquireAction(segment.getInterval()));
            }
            toolbox.getSegmentAnnouncer().announceSegments(segments);
        }

        @Override
        public void unannounceSegments(Iterable<DataSegment> segments) throws IOException {
            try {
                toolbox.getSegmentAnnouncer().unannounceSegments(segments);
            } finally {
                for (DataSegment segment : segments) {
                    toolbox.getTaskActionClient().submit(new LockReleaseAction(segment.getInterval()));
                }
            }
        }

        @Override
        public boolean isAnnounced(DataSegment segment) {
            return toolbox.getSegmentAnnouncer().isAnnounced(segment);
        }
    };
    // NOTE: getVersion will block if there is lock contention, which will block plumber.getSink
    // NOTE: (and thus the firehose)
    // Shouldn't usually happen, since we don't expect people to submit tasks that intersect with the
    // realtime window, but if they do it can be problematic. If we decide to care, we can use more threads in
    // the plumber such that waiting for the coordinator doesn't block data processing.
    final VersioningPolicy versioningPolicy = new VersioningPolicy() {

        @Override
        public String getVersion(final Interval interval) {
            try {
                // Side effect: Calling getVersion causes a lock to be acquired
                final TaskLock myLock = toolbox.getTaskActionClient().submit(new LockAcquireAction(interval));
                return myLock.getVersion();
            } catch (IOException e) {
                throw Throwables.propagate(e);
            }
        }
    };
    DataSchema dataSchema = spec.getDataSchema();
    RealtimeIOConfig realtimeIOConfig = spec.getIOConfig();
    RealtimeTuningConfig tuningConfig = spec.getTuningConfig().withBasePersistDirectory(new File(toolbox.getTaskWorkDir(), "persist")).withVersioningPolicy(versioningPolicy);
    final FireDepartment fireDepartment = new FireDepartment(dataSchema, realtimeIOConfig, tuningConfig);
    this.metrics = fireDepartment.getMetrics();
    final RealtimeMetricsMonitor metricsMonitor = new RealtimeMetricsMonitor(ImmutableList.of(fireDepartment), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() }));
    this.queryRunnerFactoryConglomerate = toolbox.getQueryRunnerFactoryConglomerate();
    // NOTE: This pusher selects path based purely on global configuration and the DataSegment, which means
    // NOTE: that redundant realtime tasks will upload to the same location. This can cause index.zip
    // NOTE: (partitionNum_index.zip for HDFS data storage) and descriptor.json (partitionNum_descriptor.json for
    // NOTE: HDFS data storage) to mismatch, or it can cause historical nodes to load different instances of
    // NOTE: the "same" segment.
    final PlumberSchool plumberSchool = new RealtimePlumberSchool(toolbox.getEmitter(), toolbox.getQueryRunnerFactoryConglomerate(), toolbox.getSegmentPusher(), lockingSegmentAnnouncer, segmentPublisher, toolbox.getSegmentHandoffNotifierFactory(), toolbox.getQueryExecutorService(), toolbox.getIndexMerger(), toolbox.getIndexMergerV9(), toolbox.getIndexIO(), toolbox.getCache(), toolbox.getCacheConfig(), toolbox.getObjectMapper());
    this.plumber = plumberSchool.findPlumber(dataSchema, tuningConfig, metrics);
    Supplier<Committer> committerSupplier = null;
    try {
        plumber.startJob();
        // Set up metrics emission
        toolbox.getMonitorScheduler().addMonitor(metricsMonitor);
        // Delay firehose connection to avoid claiming input resources while the plumber is starting up.
        final FirehoseFactory firehoseFactory = spec.getIOConfig().getFirehoseFactory();
        final boolean firehoseDrainableByClosing = isFirehoseDrainableByClosing(firehoseFactory);
        // Skip connecting firehose if we've been stopped before we got started.
        synchronized (this) {
            if (!gracefullyStopped) {
                firehose = firehoseFactory.connect(spec.getDataSchema().getParser());
                committerSupplier = Committers.supplierFromFirehose(firehose);
            }
        }
        // Time to read data!
        while (firehose != null && (!gracefullyStopped || firehoseDrainableByClosing) && firehose.hasMore()) {
            Plumbers.addNextRow(committerSupplier, firehose, plumber, tuningConfig.isReportParseExceptions(), metrics);
        }
    } catch (Throwable e) {
        normalExit = false;
        log.makeAlert(e, "Exception aborted realtime processing[%s]", dataSchema.getDataSource()).emit();
        throw e;
    } finally {
        if (normalExit) {
            try {
                // Persist if we had actually started.
                if (firehose != null) {
                    log.info("Persisting remaining data.");
                    final Committer committer = committerSupplier.get();
                    final CountDownLatch persistLatch = new CountDownLatch(1);
                    plumber.persist(new Committer() {

                        @Override
                        public Object getMetadata() {
                            return committer.getMetadata();
                        }

                        @Override
                        public void run() {
                            try {
                                committer.run();
                            } finally {
                                persistLatch.countDown();
                            }
                        }
                    });
                    persistLatch.await();
                }
                if (gracefullyStopped) {
                    log.info("Gracefully stopping.");
                } else {
                    log.info("Finishing the job.");
                    synchronized (this) {
                        if (gracefullyStopped) {
                            // Someone called stopGracefully after we checked the flag. That's okay, just stop now.
                            log.info("Gracefully stopping.");
                        } else {
                            finishingJob = true;
                        }
                    }
                    if (finishingJob) {
                        plumber.finishJob();
                    }
                }
            } catch (InterruptedException e) {
                log.debug(e, "Interrupted while finishing the job");
            } catch (Exception e) {
                log.makeAlert(e, "Failed to finish realtime task").emit();
                throw e;
            } finally {
                if (firehose != null) {
                    CloseQuietly.close(firehose);
                }
                toolbox.getMonitorScheduler().removeMonitor(metricsMonitor);
            }
        }
    }
    log.info("Job done!");
    return TaskStatus.success(getId());
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) DataSegmentAnnouncer(io.druid.server.coordination.DataSegmentAnnouncer) EventReceiverFirehoseFactory(io.druid.segment.realtime.firehose.EventReceiverFirehoseFactory) ClippedFirehoseFactory(io.druid.segment.realtime.firehose.ClippedFirehoseFactory) TimedShutoffFirehoseFactory(io.druid.segment.realtime.firehose.TimedShutoffFirehoseFactory) FirehoseFactory(io.druid.data.input.FirehoseFactory) DataSegment(io.druid.timeline.DataSegment) FireDepartment(io.druid.segment.realtime.FireDepartment) SegmentPublisher(io.druid.segment.realtime.SegmentPublisher) TaskLock(io.druid.indexing.common.TaskLock) LockAcquireAction(io.druid.indexing.common.actions.LockAcquireAction) IOException(java.io.IOException) RealtimePlumberSchool(io.druid.segment.realtime.plumber.RealtimePlumberSchool) PlumberSchool(io.druid.segment.realtime.plumber.PlumberSchool) RealtimeTuningConfig(io.druid.segment.indexing.RealtimeTuningConfig) CountDownLatch(java.util.concurrent.CountDownLatch) LockReleaseAction(io.druid.indexing.common.actions.LockReleaseAction) IOException(java.io.IOException) DataSchema(io.druid.segment.indexing.DataSchema) VersioningPolicy(io.druid.segment.realtime.plumber.VersioningPolicy) RealtimePlumberSchool(io.druid.segment.realtime.plumber.RealtimePlumberSchool) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) File(java.io.File) Interval(org.joda.time.Interval)

Aggregations

Committer (io.druid.data.input.Committer)3 RealtimeIOConfig (io.druid.segment.indexing.RealtimeIOConfig)3 FireDepartment (io.druid.segment.realtime.FireDepartment)3 RealtimeMetricsMonitor (io.druid.segment.realtime.RealtimeMetricsMonitor)3 DataSegment (io.druid.timeline.DataSegment)3 InputRow (io.druid.data.input.InputRow)2 SegmentTransactionalInsertAction (io.druid.indexing.common.actions.SegmentTransactionalInsertAction)2 ISE (io.druid.java.util.common.ISE)2 ParseException (io.druid.java.util.common.parsers.ParseException)2 Appenderator (io.druid.segment.realtime.appenderator.Appenderator)2 FiniteAppenderatorDriver (io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver)2 SegmentIdentifier (io.druid.segment.realtime.appenderator.SegmentIdentifier)2 SegmentsAndMetadata (io.druid.segment.realtime.appenderator.SegmentsAndMetadata)2 TransactionalSegmentPublisher (io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher)2 IOException (java.io.IOException)2 Set (java.util.Set)2 Interval (org.joda.time.Interval)2 Optional (com.google.common.base.Optional)1 Supplier (com.google.common.base.Supplier)1 ImmutableMap (com.google.common.collect.ImmutableMap)1