Search in sources :

Example 16 with TimelineObjectHolder

use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
    log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
    final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
    // Download all segments locally.
    // Note: this requires enough local storage space to fit all of the segments, even though
    // IngestSegmentFirehose iterates over the segments in series. We may want to change this
    // to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
    for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
        for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            final DataSegment segment = chunk.getObject();
            segmentFileMap.computeIfAbsent(segment, k -> {
                try {
                    return segmentCacheManager.getSegmentFiles(segment);
                } catch (SegmentLoadingException e) {
                    throw new RuntimeException(e);
                }
            });
        }
    }
    final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
    final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
    final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

        @Override
        public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
            return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                @Override
                public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                    final DataSegment segment = input.getObject();
                    try {
                        return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        }
    })));
    final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
    return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
Also used : IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Function(com.google.common.base.Function) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Example 17 with TimelineObjectHolder

use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class DruidInputSource method createSplits.

public static Iterator<InputSplit<List<WindowedSegmentId>>> createSplits(CoordinatorClient coordinatorClient, RetryPolicyFactory retryPolicyFactory, String dataSource, Interval interval, SplitHintSpec splitHintSpec) {
    final SplitHintSpec convertedSplitHintSpec;
    if (splitHintSpec instanceof SegmentsSplitHintSpec) {
        final SegmentsSplitHintSpec segmentsSplitHintSpec = (SegmentsSplitHintSpec) splitHintSpec;
        convertedSplitHintSpec = new MaxSizeSplitHintSpec(segmentsSplitHintSpec.getMaxInputSegmentBytesPerTask(), segmentsSplitHintSpec.getMaxNumSegments());
    } else {
        convertedSplitHintSpec = splitHintSpec;
    }
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = getTimelineForInterval(coordinatorClient, retryPolicyFactory, dataSource, interval);
    final Map<WindowedSegmentId, Long> segmentIdToSize = createWindowedSegmentIdFromTimeline(timelineSegments);
    // noinspection ConstantConditions
    return Iterators.transform(convertedSplitHintSpec.split(// the same input split.
    segmentIdToSize.keySet().iterator(), segmentId -> new InputFileAttribute(Preconditions.checkNotNull(segmentIdToSize.get(segmentId), "segment size for [%s]", segmentId))), InputSplit::new);
}
Also used : JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) InputSplit(org.apache.druid.data.input.InputSplit) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec)

Example 18 with TimelineObjectHolder

use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class DruidInputSource method fixedFormatReader.

@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
    final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
        // noinspection ConstantConditions
        final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
        // noinspection ConstantConditions
        return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
    }).iterator();
    final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
    final InputRowSchema inputRowSchemaToUse;
    if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
        // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
        LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
        inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
    } else {
        inputRowSchemaToUse = inputRowSchema;
    }
    if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
        // Slight chance the user did this intentionally, but not likely. Log a warning.
        LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
    }
    return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}
Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader)

Example 19 with TimelineObjectHolder

use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method createNewSegment.

/**
 * This function creates a new segment for the given datasource/interval/etc. A critical
 * aspect of the creation is to make sure that the new version & new partition number will make
 * sense given the existing segments & pending segments also very important is to avoid
 * clashes with existing pending & used/unused segments.
 * @param handle Database handle
 * @param dataSource datasource for the new segment
 * @param interval interval for the new segment
 * @param partialShardSpec Shard spec info minus segment id stuff
 * @param existingVersion Version of segments in interval, used to compute the version of the very first segment in
 *                        interval
 * @return
 * @throws IOException
 */
@Nullable
private SegmentIdWithShardSpec createNewSegment(final Handle handle, final String dataSource, final Interval interval, final PartialShardSpec partialShardSpec, final String existingVersion) throws IOException {
    // Get the time chunk and associated data segments for the given interval, if any
    final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
    if (existingChunks.size() > 1) {
        // Not possible to expand more than one chunk with a single segment.
        log.warn("Cannot allocate new segment for dataSource[%s], interval[%s]: already have [%,d] chunks.", dataSource, interval, existingChunks.size());
        return null;
    } else {
        // max partitionId of the shardSpecs which share the same partition space.
        SegmentIdWithShardSpec maxId = null;
        if (!existingChunks.isEmpty()) {
            TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
            // noinspection ConstantConditions
            for (DataSegment segment : FluentIterable.from(existingHolder.getObject()).transform(PartitionChunk::getObject).filter(segment -> segment.getShardSpec().sharePartitionSpace(partialShardSpec))) {
                // Note that this will compute the max id of existing, visible, data segments in the time chunk:
                if (maxId == null || maxId.getShardSpec().getPartitionNum() < segment.getShardSpec().getPartitionNum()) {
                    maxId = SegmentIdWithShardSpec.fromDataSegment(segment);
                }
            }
        }
        // Get the version of the existing chunk, we might need it in some of the cases below
        // to compute the new identifier's version
        @Nullable final String versionOfExistingChunk;
        if (!existingChunks.isEmpty()) {
            // remember only one chunk possible for given interval so get the first & only one
            versionOfExistingChunk = existingChunks.get(0).getVersion();
        } else {
            versionOfExistingChunk = null;
        }
        // next, we need to enrich the maxId computed before with the information of the pending segments
        // it is possible that a pending segment has a higher id in which case we need that, it will work,
        // and it will avoid clashes when inserting the new pending segment later in the caller of this method
        final Set<SegmentIdWithShardSpec> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
        // Make sure we add the maxId we obtained from the segments table:
        if (maxId != null) {
            pendings.add(maxId);
        }
        // Now compute the maxId with all the information: pendings + segments:
        // The versionOfExistingChunks filter is ensure that we pick the max id with the version of the existing chunk
        // in the case that there may be a pending segment with a higher version but no corresponding used segments
        // which may generate a clash with an existing segment once the new id is generated
        maxId = pendings.stream().filter(id -> id.getShardSpec().sharePartitionSpace(partialShardSpec)).filter(id -> versionOfExistingChunk == null ? true : id.getVersion().equals(versionOfExistingChunk)).max((id1, id2) -> {
            final int versionCompare = id1.getVersion().compareTo(id2.getVersion());
            if (versionCompare != 0) {
                return versionCompare;
            } else {
                return Integer.compare(id1.getShardSpec().getPartitionNum(), id2.getShardSpec().getPartitionNum());
            }
        }).orElse(null);
        // The following code attempts to compute the new version, if this
        // new version is not null at the end of next block then it will be
        // used as the new version in the case for initial or appended segment
        final String newSegmentVersion;
        if (versionOfExistingChunk != null) {
            // segment version overrides, so pick that now that we know it exists
            newSegmentVersion = versionOfExistingChunk;
        } else if (!pendings.isEmpty() && maxId != null) {
            // there is no visible segments in the time chunk, so pick the maxId of pendings, as computed above
            newSegmentVersion = maxId.getVersion();
        } else {
            // no segments, no pendings, so this must be the very first segment created for this interval
            newSegmentVersion = null;
        }
        if (maxId == null) {
            // When appending segments, null maxId means that we are allocating the very initial
            // segment for this time chunk.
            // This code is executed when the Overlord coordinates segment allocation, which is either you append segments
            // or you use segment lock. Since the core partitions set is not determined for appended segments, we set
            // it 0. When you use segment lock, the core partitions set doesn't work with it. We simply set it 0 so that the
            // OvershadowableManager handles the atomic segment update.
            final int newPartitionId = partialShardSpec.useNonRootGenerationPartitionSpace() ? PartitionIds.NON_ROOT_GEN_START_PARTITION_ID : PartitionIds.ROOT_GEN_START_PARTITION_ID;
            String version = newSegmentVersion == null ? existingVersion : newSegmentVersion;
            return new SegmentIdWithShardSpec(dataSource, interval, version, partialShardSpec.complete(jsonMapper, newPartitionId, 0));
        } else if (!maxId.getInterval().equals(interval) || maxId.getVersion().compareTo(existingVersion) > 0) {
            log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], existingVersion[%s]: conflicting segment[%s].", dataSource, interval, existingVersion, maxId);
            return null;
        } else if (maxId.getShardSpec().getNumCorePartitions() == SingleDimensionShardSpec.UNKNOWN_NUM_CORE_PARTITIONS) {
            log.warn("Cannot allocate new segment because of unknown core partition size of segment[%s], shardSpec[%s]", maxId, maxId.getShardSpec());
            return null;
        } else {
            return new SegmentIdWithShardSpec(dataSource, maxId.getInterval(), Preconditions.checkNotNull(newSegmentVersion, "newSegmentVersion"), partialShardSpec.complete(jsonMapper, maxId.getShardSpec().getPartitionNum() + 1, maxId.getShardSpec().getNumCorePartitions()));
        }
    }
}
Also used : Arrays(java.util.Arrays) Partitions(org.apache.druid.timeline.Partitions) Inject(com.google.inject.Inject) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) LifecycleStart(org.apache.druid.java.util.common.lifecycle.LifecycleStart) StatementContext(org.skife.jdbi.v2.StatementContext) Pair(org.apache.druid.java.util.common.Pair) FluentIterable(com.google.common.collect.FluentIterable) DataSourceMetadata(org.apache.druid.indexing.overlord.DataSourceMetadata) ResultSet(java.sql.ResultSet) Map(java.util.Map) PreparedBatch(org.skife.jdbi.v2.PreparedBatch) IAE(org.apache.druid.java.util.common.IAE) CloseableIterator(org.apache.druid.java.util.common.parsers.CloseableIterator) ByteArrayMapper(org.skife.jdbi.v2.util.ByteArrayMapper) DateTimes(org.apache.druid.java.util.common.DateTimes) ImmutableSet(com.google.common.collect.ImmutableSet) JacksonUtils(org.apache.druid.java.util.common.jackson.JacksonUtils) SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) NotNull(javax.validation.constraints.NotNull) Collectors(java.util.stream.Collectors) List(java.util.List) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) IndexerMetadataStorageCoordinator(org.apache.druid.indexing.overlord.IndexerMetadataStorageCoordinator) DataSegment(org.apache.druid.timeline.DataSegment) ISOChronology(org.joda.time.chrono.ISOChronology) PartialShardSpec(org.apache.druid.timeline.partition.PartialShardSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) Iterables(com.google.common.collect.Iterables) Intervals(org.apache.druid.java.util.common.Intervals) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) Hashing(com.google.common.hash.Hashing) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) Lists(com.google.common.collect.Lists) ImmutableList(com.google.common.collect.ImmutableList) ResultIterator(org.skife.jdbi.v2.ResultIterator) Nullable(javax.annotation.Nullable) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) BaseEncoding(com.google.common.io.BaseEncoding) HandleCallback(org.skife.jdbi.v2.tweak.HandleCallback) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DateTime(org.joda.time.DateTime) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) Query(org.skife.jdbi.v2.Query) IOException(java.io.IOException) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) Handle(org.skife.jdbi.v2.Handle) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) SegmentUtils(org.apache.druid.segment.SegmentUtils) TransactionCallback(org.skife.jdbi.v2.TransactionCallback) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Collections(java.util.Collections) StringEscapeUtils(org.apache.commons.lang.StringEscapeUtils) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) SegmentIdWithShardSpec(org.apache.druid.segment.realtime.appenderator.SegmentIdWithShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) Nullable(javax.annotation.Nullable) Nullable(javax.annotation.Nullable)

Example 20 with TimelineObjectHolder

use of org.apache.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method announceHistoricalSegments.

@Override
public SegmentPublishResult announceHistoricalSegments(final Set<DataSegment> segments, final Set<DataSegment> segmentsToDrop, @Nullable final DataSourceMetadata startMetadata, @Nullable final DataSourceMetadata endMetadata) throws IOException {
    if (segments.isEmpty()) {
        throw new IllegalArgumentException("segment set must not be empty");
    }
    final String dataSource = segments.iterator().next().getDataSource();
    for (DataSegment segment : segments) {
        if (!dataSource.equals(segment.getDataSource())) {
            throw new IllegalArgumentException("segments must all be from the same dataSource");
        }
    }
    if ((startMetadata == null && endMetadata != null) || (startMetadata != null && endMetadata == null)) {
        throw new IllegalArgumentException("start/end metadata pair must be either null or non-null");
    }
    // Find which segments are used (i.e. not overshadowed).
    final Set<DataSegment> usedSegments = new HashSet<>();
    List<TimelineObjectHolder<String, DataSegment>> segmentHolders = VersionedIntervalTimeline.forSegments(segments).lookupWithIncompletePartitions(Intervals.ETERNITY);
    for (TimelineObjectHolder<String, DataSegment> holder : segmentHolders) {
        for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            usedSegments.add(chunk.getObject());
        }
    }
    final AtomicBoolean definitelyNotUpdated = new AtomicBoolean(false);
    try {
        return connector.retryTransaction(new TransactionCallback<SegmentPublishResult>() {

            @Override
            public SegmentPublishResult inTransaction(final Handle handle, final TransactionStatus transactionStatus) throws Exception {
                // Set definitelyNotUpdated back to false upon retrying.
                definitelyNotUpdated.set(false);
                if (startMetadata != null) {
                    final DataStoreMetadataUpdateResult result = updateDataSourceMetadataWithHandle(handle, dataSource, startMetadata, endMetadata);
                    if (result != DataStoreMetadataUpdateResult.SUCCESS) {
                        // Metadata was definitely not updated.
                        transactionStatus.setRollbackOnly();
                        definitelyNotUpdated.set(true);
                        if (result == DataStoreMetadataUpdateResult.FAILURE) {
                            throw new RuntimeException("Aborting transaction!");
                        } else if (result == DataStoreMetadataUpdateResult.TRY_AGAIN) {
                            throw new RetryTransactionException("Aborting transaction!");
                        }
                    }
                }
                if (segmentsToDrop != null && !segmentsToDrop.isEmpty()) {
                    final DataStoreMetadataUpdateResult result = dropSegmentsWithHandle(handle, segmentsToDrop, dataSource);
                    if (result != DataStoreMetadataUpdateResult.SUCCESS) {
                        // Metadata store was definitely not updated.
                        transactionStatus.setRollbackOnly();
                        definitelyNotUpdated.set(true);
                        if (result == DataStoreMetadataUpdateResult.FAILURE) {
                            throw new RuntimeException("Aborting transaction!");
                        } else if (result == DataStoreMetadataUpdateResult.TRY_AGAIN) {
                            throw new RetryTransactionException("Aborting transaction!");
                        }
                    }
                }
                final Set<DataSegment> inserted = announceHistoricalSegmentBatch(handle, segments, usedSegments);
                return SegmentPublishResult.ok(ImmutableSet.copyOf(inserted));
            }
        }, 3, getSqlMetadataMaxRetry());
    } catch (CallbackFailedException e) {
        if (definitelyNotUpdated.get()) {
            return SegmentPublishResult.fail(e.getMessage());
        } else {
            // Must throw exception if we are not sure if we updated or not.
            throw e;
        }
    }
}
Also used : ResultSet(java.sql.ResultSet) ImmutableSet(com.google.common.collect.ImmutableSet) Set(java.util.Set) HashSet(java.util.HashSet) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(org.apache.druid.timeline.DataSegment) JsonProcessingException(com.fasterxml.jackson.core.JsonProcessingException) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) Handle(org.skife.jdbi.v2.Handle) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) SegmentPublishResult(org.apache.druid.indexing.overlord.SegmentPublishResult) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) HashSet(java.util.HashSet)

Aggregations

TimelineObjectHolder (org.apache.druid.timeline.TimelineObjectHolder)28 DataSegment (org.apache.druid.timeline.DataSegment)23 Interval (org.joda.time.Interval)18 ArrayList (java.util.ArrayList)14 Test (org.junit.Test)12 List (java.util.List)11 TableDataSource (org.apache.druid.query.TableDataSource)10 Map (java.util.Map)9 IOException (java.io.IOException)8 HashSet (java.util.HashSet)8 CountDownLatch (java.util.concurrent.CountDownLatch)8 VersionedIntervalTimeline (org.apache.druid.timeline.VersionedIntervalTimeline)8 ImmutableList (com.google.common.collect.ImmutableList)7 ISE (org.apache.druid.java.util.common.ISE)7 PartitionChunk (org.apache.druid.timeline.partition.PartitionChunk)7 Preconditions (com.google.common.base.Preconditions)6 Collections (java.util.Collections)6 HashMap (java.util.HashMap)6 Logger (org.apache.druid.java.util.common.logger.Logger)6 File (java.io.File)5