Search in sources :

Example 81 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class InsertSegment method insertSegments.

private void insertSegments(final Set<DataSegment> segments) throws IOException {
    final Set<DataSegment> segmentsInserted = indexerMetadataStorageCoordinator.announceHistoricalSegments(segments);
    for (DataSegment dataSegment : segmentsInserted) {
        log.info("Sucessfully inserted Segment [%s] into metadata storage", dataSegment.getIdentifier());
    }
    final Set<DataSegment> segmentsAlreadyExist = Sets.difference(segments, segmentsInserted);
    if (!segmentsAlreadyExist.isEmpty()) {
        for (DataSegment dataSegment : segmentsAlreadyExist) {
            log.info("Segment [%s] already exists in metadata storage, updating the payload", dataSegment.getIdentifier());
        }
        indexerMetadataStorageCoordinator.updateSegmentMetadata(segmentsAlreadyExist);
    }
}
Also used : DataSegment(io.druid.timeline.DataSegment)

Example 82 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class DruidSchema method start.

@LifecycleStart
public void start() {
    cacheExec.submit(new Runnable() {

        @Override
        public void run() {
            try {
                while (!Thread.currentThread().isInterrupted()) {
                    final Set<String> dataSources = Sets.newHashSet();
                    try {
                        synchronized (lock) {
                            final long nextRefresh = new DateTime(lastRefresh).plus(config.getMetadataRefreshPeriod()).getMillis();
                            while (!(isServerViewInitialized && !dataSourcesNeedingRefresh.isEmpty() && (refreshImmediately || nextRefresh < System.currentTimeMillis()))) {
                                lock.wait(Math.max(1, nextRefresh - System.currentTimeMillis()));
                            }
                            dataSources.addAll(dataSourcesNeedingRefresh);
                            dataSourcesNeedingRefresh.clear();
                            lastRefresh = System.currentTimeMillis();
                            refreshImmediately = false;
                        }
                        // Refresh dataSources.
                        for (final String dataSource : dataSources) {
                            log.debug("Refreshing metadata for dataSource[%s].", dataSource);
                            final long startTime = System.currentTimeMillis();
                            final DruidTable druidTable = computeTable(dataSource);
                            if (druidTable == null) {
                                if (tables.remove(dataSource) != null) {
                                    log.info("Removed dataSource[%s] from the list of active dataSources.", dataSource);
                                }
                            } else {
                                tables.put(dataSource, druidTable);
                                log.info("Refreshed metadata for dataSource[%s] in %,dms.", dataSource, System.currentTimeMillis() - startTime);
                            }
                        }
                        initializationLatch.countDown();
                    } catch (InterruptedException e) {
                        // Fall through.
                        throw e;
                    } catch (Exception e) {
                        log.warn(e, "Metadata refresh failed for dataSources[%s], trying again soon.", Joiner.on(", ").join(dataSources));
                        synchronized (lock) {
                            // Add dataSources back to the refresh list.
                            dataSourcesNeedingRefresh.addAll(dataSources);
                            lock.notifyAll();
                        }
                    }
                }
            } catch (InterruptedException e) {
            // Just exit.
            } catch (Throwable e) {
                // Throwables that fall out to here (not caught by an inner try/catch) are potentially gnarly, like
                // OOMEs. Anyway, let's just emit an alert and stop refreshing metadata.
                log.makeAlert(e, "Metadata refresh failed permanently").emit();
                throw e;
            } finally {
                log.info("Metadata refresh stopped.");
            }
        }
    });
    serverView.registerSegmentCallback(MoreExecutors.sameThreadExecutor(), new ServerView.SegmentCallback() {

        @Override
        public ServerView.CallbackAction segmentViewInitialized() {
            synchronized (lock) {
                isServerViewInitialized = true;
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }

        @Override
        public ServerView.CallbackAction segmentAdded(DruidServerMetadata server, DataSegment segment) {
            synchronized (lock) {
                dataSourcesNeedingRefresh.add(segment.getDataSource());
                if (!tables.containsKey(segment.getDataSource())) {
                    refreshImmediately = true;
                }
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }

        @Override
        public ServerView.CallbackAction segmentRemoved(DruidServerMetadata server, DataSegment segment) {
            synchronized (lock) {
                dataSourcesNeedingRefresh.add(segment.getDataSource());
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }
    });
    serverView.registerServerCallback(MoreExecutors.sameThreadExecutor(), new ServerView.ServerCallback() {

        @Override
        public ServerView.CallbackAction serverRemoved(DruidServer server) {
            final List<String> dataSourceNames = Lists.newArrayList();
            for (DruidDataSource druidDataSource : server.getDataSources()) {
                dataSourceNames.add(druidDataSource.getName());
            }
            synchronized (lock) {
                dataSourcesNeedingRefresh.addAll(dataSourceNames);
                lock.notifyAll();
            }
            return ServerView.CallbackAction.CONTINUE;
        }
    });
}
Also used : EnumSet(java.util.EnumSet) Set(java.util.Set) DruidTable(io.druid.sql.calcite.table.DruidTable) DruidServer(io.druid.client.DruidServer) DruidServerMetadata(io.druid.server.coordination.DruidServerMetadata) DataSegment(io.druid.timeline.DataSegment) DruidDataSource(io.druid.client.DruidDataSource) DateTime(org.joda.time.DateTime) ServerView(io.druid.client.ServerView) TimelineServerView(io.druid.client.TimelineServerView) List(java.util.List) LifecycleStart(io.druid.java.util.common.lifecycle.LifecycleStart)

Example 83 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class SpecificSegmentsQuerySegmentWalker method add.

public SpecificSegmentsQuerySegmentWalker add(final DataSegment descriptor, final QueryableIndex index) {
    final Segment segment = new QueryableIndexSegment(descriptor.getIdentifier(), index);
    if (!timelines.containsKey(descriptor.getDataSource())) {
        timelines.put(descriptor.getDataSource(), new VersionedIntervalTimeline<String, Segment>(Ordering.natural()));
    }
    final VersionedIntervalTimeline<String, Segment> timeline = timelines.get(descriptor.getDataSource());
    timeline.add(descriptor.getInterval(), descriptor.getVersion(), descriptor.getShardSpec().createChunk(segment));
    segments.add(descriptor);
    closeables.add(index);
    return this;
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) DataSegment(io.druid.timeline.DataSegment) Segment(io.druid.segment.Segment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment)

Example 84 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class TestServerInventoryView method registerSegmentCallback.

@Override
public void registerSegmentCallback(Executor exec, final SegmentCallback callback) {
    final DruidServerMetadata dummyServer = new DruidServerMetadata("dummy", "dummy", 0, "dummy", "dummy", 0);
    for (final DataSegment segment : segments) {
        exec.execute(new Runnable() {

            @Override
            public void run() {
                callback.segmentAdded(dummyServer, segment);
            }
        });
    }
    exec.execute(new Runnable() {

        @Override
        public void run() {
            callback.segmentViewInitialized();
        }
    });
}
Also used : DruidServerMetadata(io.druid.server.coordination.DruidServerMetadata) DataSegment(io.druid.timeline.DataSegment)

Example 85 with DataSegment

use of io.druid.timeline.DataSegment in project druid by druid-io.

the class KafkaIndexTask method run.

@Override
public TaskStatus run(final TaskToolbox toolbox) throws Exception {
    log.info("Starting up!");
    startTime = DateTime.now();
    mapper = toolbox.getObjectMapper();
    status = Status.STARTING;
    if (chatHandlerProvider.isPresent()) {
        log.info("Found chat handler of class[%s]", chatHandlerProvider.get().getClass().getName());
        chatHandlerProvider.get().register(getId(), this, false);
    } else {
        log.warn("No chat handler detected");
    }
    runThread = Thread.currentThread();
    // Set up FireDepartmentMetrics
    final FireDepartment fireDepartmentForMetrics = new FireDepartment(dataSchema, new RealtimeIOConfig(null, null, null), null);
    fireDepartmentMetrics = fireDepartmentForMetrics.getMetrics();
    toolbox.getMonitorScheduler().addMonitor(new RealtimeMetricsMonitor(ImmutableList.of(fireDepartmentForMetrics), ImmutableMap.of(DruidMetrics.TASK_ID, new String[] { getId() })));
    try (final Appenderator appenderator0 = newAppenderator(fireDepartmentMetrics, toolbox);
        final FiniteAppenderatorDriver driver = newDriver(appenderator0, toolbox, fireDepartmentMetrics);
        final KafkaConsumer<byte[], byte[]> consumer = newConsumer()) {
        appenderator = appenderator0;
        final String topic = ioConfig.getStartPartitions().getTopic();
        // Start up, set up initial offsets.
        final Object restoredMetadata = driver.startJob();
        if (restoredMetadata == null) {
            nextOffsets.putAll(ioConfig.getStartPartitions().getPartitionOffsetMap());
        } else {
            final Map<String, Object> restoredMetadataMap = (Map) restoredMetadata;
            final KafkaPartitions restoredNextPartitions = toolbox.getObjectMapper().convertValue(restoredMetadataMap.get(METADATA_NEXT_PARTITIONS), KafkaPartitions.class);
            nextOffsets.putAll(restoredNextPartitions.getPartitionOffsetMap());
            // Sanity checks.
            if (!restoredNextPartitions.getTopic().equals(ioConfig.getStartPartitions().getTopic())) {
                throw new ISE("WTF?! Restored topic[%s] but expected topic[%s]", restoredNextPartitions.getTopic(), ioConfig.getStartPartitions().getTopic());
            }
            if (!nextOffsets.keySet().equals(ioConfig.getStartPartitions().getPartitionOffsetMap().keySet())) {
                throw new ISE("WTF?! Restored partitions[%s] but expected partitions[%s]", nextOffsets.keySet(), ioConfig.getStartPartitions().getPartitionOffsetMap().keySet());
            }
        }
        // Set up sequenceNames.
        final Map<Integer, String> sequenceNames = Maps.newHashMap();
        for (Integer partitionNum : nextOffsets.keySet()) {
            sequenceNames.put(partitionNum, String.format("%s_%s", ioConfig.getBaseSequenceName(), partitionNum));
        }
        // Set up committer.
        final Supplier<Committer> committerSupplier = new Supplier<Committer>() {

            @Override
            public Committer get() {
                final Map<Integer, Long> snapshot = ImmutableMap.copyOf(nextOffsets);
                return new Committer() {

                    @Override
                    public Object getMetadata() {
                        return ImmutableMap.of(METADATA_NEXT_PARTITIONS, new KafkaPartitions(ioConfig.getStartPartitions().getTopic(), snapshot));
                    }

                    @Override
                    public void run() {
                    // Do nothing.
                    }
                };
            }
        };
        Set<Integer> assignment = assignPartitionsAndSeekToNext(consumer, topic);
        // Main loop.
        // Could eventually support leader/follower mode (for keeping replicas more in sync)
        boolean stillReading = !assignment.isEmpty();
        status = Status.READING;
        try {
            while (stillReading) {
                if (possiblyPause(assignment)) {
                    // The partition assignments may have changed while paused by a call to setEndOffsets() so reassign
                    // partitions upon resuming. This is safe even if the end offsets have not been modified.
                    assignment = assignPartitionsAndSeekToNext(consumer, topic);
                    if (assignment.isEmpty()) {
                        log.info("All partitions have been fully read");
                        publishOnStop = true;
                        stopRequested = true;
                    }
                }
                if (stopRequested) {
                    break;
                }
                // The retrying business is because the KafkaConsumer throws OffsetOutOfRangeException if the seeked-to
                // offset is not present in the topic-partition. This can happen if we're asking a task to read from data
                // that has not been written yet (which is totally legitimate). So let's wait for it to show up.
                ConsumerRecords<byte[], byte[]> records = ConsumerRecords.empty();
                try {
                    records = consumer.poll(POLL_TIMEOUT);
                } catch (OffsetOutOfRangeException e) {
                    log.warn("OffsetOutOfRangeException with message [%s]", e.getMessage());
                    possiblyResetOffsetsOrWait(e.offsetOutOfRangePartitions(), consumer, toolbox);
                    stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
                }
                for (ConsumerRecord<byte[], byte[]> record : records) {
                    if (log.isTraceEnabled()) {
                        log.trace("Got topic[%s] partition[%d] offset[%,d].", record.topic(), record.partition(), record.offset());
                    }
                    if (record.offset() < endOffsets.get(record.partition())) {
                        if (record.offset() != nextOffsets.get(record.partition())) {
                            throw new ISE("WTF?! Got offset[%,d] after offset[%,d] in partition[%d].", record.offset(), nextOffsets.get(record.partition()), record.partition());
                        }
                        try {
                            final byte[] valueBytes = record.value();
                            if (valueBytes == null) {
                                throw new ParseException("null value");
                            }
                            final InputRow row = Preconditions.checkNotNull(parser.parse(ByteBuffer.wrap(valueBytes)), "row");
                            if (!ioConfig.getMinimumMessageTime().isPresent() || !ioConfig.getMinimumMessageTime().get().isAfter(row.getTimestamp())) {
                                final SegmentIdentifier identifier = driver.add(row, sequenceNames.get(record.partition()), committerSupplier);
                                if (identifier == null) {
                                    // If we allow continuing, then consider blacklisting the interval for a while to avoid constant checks.
                                    throw new ISE("Could not allocate segment for row with timestamp[%s]", row.getTimestamp());
                                }
                                fireDepartmentMetrics.incrementProcessed();
                            } else {
                                fireDepartmentMetrics.incrementThrownAway();
                            }
                        } catch (ParseException e) {
                            if (tuningConfig.isReportParseExceptions()) {
                                throw e;
                            } else {
                                log.debug(e, "Dropping unparseable row from partition[%d] offset[%,d].", record.partition(), record.offset());
                                fireDepartmentMetrics.incrementUnparseable();
                            }
                        }
                        nextOffsets.put(record.partition(), record.offset() + 1);
                    }
                    if (nextOffsets.get(record.partition()).equals(endOffsets.get(record.partition())) && assignment.remove(record.partition())) {
                        log.info("Finished reading topic[%s], partition[%,d].", record.topic(), record.partition());
                        assignPartitions(consumer, topic, assignment);
                        stillReading = ioConfig.isPauseAfterRead() || !assignment.isEmpty();
                    }
                }
            }
        } finally {
            // persist pending data
            driver.persist(committerSupplier.get());
        }
        synchronized (statusLock) {
            if (stopRequested && !publishOnStop) {
                throw new InterruptedException("Stopping without publishing");
            }
            status = Status.PUBLISHING;
        }
        final TransactionalSegmentPublisher publisher = new TransactionalSegmentPublisher() {

            @Override
            public boolean publishSegments(Set<DataSegment> segments, Object commitMetadata) throws IOException {
                final KafkaPartitions finalPartitions = toolbox.getObjectMapper().convertValue(((Map) commitMetadata).get(METADATA_NEXT_PARTITIONS), KafkaPartitions.class);
                // Sanity check, we should only be publishing things that match our desired end state.
                if (!endOffsets.equals(finalPartitions.getPartitionOffsetMap())) {
                    throw new ISE("WTF?! Driver attempted to publish invalid metadata[%s].", commitMetadata);
                }
                final SegmentTransactionalInsertAction action;
                if (ioConfig.isUseTransaction()) {
                    action = new SegmentTransactionalInsertAction(segments, new KafkaDataSourceMetadata(ioConfig.getStartPartitions()), new KafkaDataSourceMetadata(finalPartitions));
                } else {
                    action = new SegmentTransactionalInsertAction(segments, null, null);
                }
                log.info("Publishing with isTransaction[%s].", ioConfig.isUseTransaction());
                return toolbox.getTaskActionClient().submit(action).isSuccess();
            }
        };
        final SegmentsAndMetadata published = driver.finish(publisher, committerSupplier.get());
        if (published == null) {
            throw new ISE("Transaction failure publishing segments, aborting");
        } else {
            log.info("Published segments[%s] with metadata[%s].", Joiner.on(", ").join(Iterables.transform(published.getSegments(), new Function<DataSegment, String>() {

                @Override
                public String apply(DataSegment input) {
                    return input.getIdentifier();
                }
            })), published.getCommitMetadata());
        }
    } catch (InterruptedException | RejectedExecutionException e) {
        // handle the InterruptedException that gets wrapped in a RejectedExecutionException
        if (e instanceof RejectedExecutionException && (e.getCause() == null || !(e.getCause() instanceof InterruptedException))) {
            throw e;
        }
        // if we were interrupted because we were asked to stop, handle the exception and return success, else rethrow
        if (!stopRequested) {
            Thread.currentThread().interrupt();
            throw e;
        }
        log.info("The task was asked to stop before completing");
    } finally {
        if (chatHandlerProvider.isPresent()) {
            chatHandlerProvider.get().unregister(getId());
        }
    }
    return success();
}
Also used : RealtimeIOConfig(io.druid.segment.indexing.RealtimeIOConfig) Set(java.util.Set) SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) SegmentTransactionalInsertAction(io.druid.indexing.common.actions.SegmentTransactionalInsertAction) DataSegment(io.druid.timeline.DataSegment) FireDepartment(io.druid.segment.realtime.FireDepartment) TransactionalSegmentPublisher(io.druid.segment.realtime.appenderator.TransactionalSegmentPublisher) ISE(io.druid.java.util.common.ISE) Supplier(com.google.common.base.Supplier) SegmentsAndMetadata(io.druid.segment.realtime.appenderator.SegmentsAndMetadata) RejectedExecutionException(java.util.concurrent.RejectedExecutionException) Appenderator(io.druid.segment.realtime.appenderator.Appenderator) FiniteAppenderatorDriver(io.druid.segment.realtime.appenderator.FiniteAppenderatorDriver) InputRow(io.druid.data.input.InputRow) RealtimeMetricsMonitor(io.druid.segment.realtime.RealtimeMetricsMonitor) Committer(io.druid.data.input.Committer) ParseException(io.druid.java.util.common.parsers.ParseException) OffsetOutOfRangeException(org.apache.kafka.clients.consumer.OffsetOutOfRangeException) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap)

Aggregations

DataSegment (io.druid.timeline.DataSegment)293 Test (org.junit.Test)151 Interval (org.joda.time.Interval)136 File (java.io.File)56 DateTime (org.joda.time.DateTime)52 IOException (java.io.IOException)37 DruidServer (io.druid.client.DruidServer)36 Map (java.util.Map)35 DruidDataSource (io.druid.client.DruidDataSource)19 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)18 List (java.util.List)17 DefaultObjectMapper (io.druid.jackson.DefaultObjectMapper)16 Rule (io.druid.server.coordinator.rules.Rule)16 ImmutableMap (com.google.common.collect.ImmutableMap)15 ForeverLoadRule (io.druid.server.coordinator.rules.ForeverLoadRule)14 IntervalDropRule (io.druid.server.coordinator.rules.IntervalDropRule)13 IntervalLoadRule (io.druid.server.coordinator.rules.IntervalLoadRule)13 CountDownLatch (java.util.concurrent.CountDownLatch)13 GET (javax.ws.rs.GET)13 Produces (javax.ws.rs.Produces)13