Search in sources :

Example 16 with TimelineObjectHolder

use of io.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class AppendTask method merge.

@Override
public File merge(final TaskToolbox toolbox, final Map<DataSegment, File> segments, final File outDir) throws Exception {
    VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural().nullsFirst());
    for (DataSegment segment : segments.keySet()) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }
    final Iterable<SegmentToMergeHolder> segmentsToMerge = Iterables.concat(Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<String, DataSegment>, Iterable<SegmentToMergeHolder>>() {

        @Override
        public Iterable<SegmentToMergeHolder> apply(final TimelineObjectHolder<String, DataSegment> input) {
            return Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, SegmentToMergeHolder>() {

                @Nullable
                @Override
                public SegmentToMergeHolder apply(PartitionChunk<DataSegment> chunkInput) {
                    DataSegment segment = chunkInput.getObject();
                    return new SegmentToMergeHolder(segment, input.getInterval(), Preconditions.checkNotNull(segments.get(segment), "File for segment %s", segment.getIdentifier()));
                }
            });
        }
    }));
    List<IndexableAdapter> adapters = Lists.newArrayList();
    for (final SegmentToMergeHolder holder : segmentsToMerge) {
        adapters.add(new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(toolbox.getIndexIO().loadIndex(holder.getFile())), new Predicate<Rowboat>() {

            @Override
            public boolean apply(Rowboat input) {
                return holder.getInterval().contains(input.getTimestamp());
            }
        }));
    }
    IndexMerger indexMerger = buildV9Directly ? toolbox.getIndexMergerV9() : toolbox.getIndexMerger();
    return indexMerger.append(adapters, aggregators == null ? null : aggregators.toArray(new AggregatorFactory[aggregators.size()]), outDir, indexSpec);
}
Also used : IndexMerger(io.druid.segment.IndexMerger) RowboatFilteringIndexAdapter(io.druid.segment.RowboatFilteringIndexAdapter) DataSegment(io.druid.timeline.DataSegment) Predicate(com.google.common.base.Predicate) Function(com.google.common.base.Function) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) IndexableAdapter(io.druid.segment.IndexableAdapter) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) PartitionChunk(io.druid.timeline.partition.PartitionChunk) Nullable(javax.annotation.Nullable) Rowboat(io.druid.segment.Rowboat) Interval(org.joda.time.Interval)

Example 17 with TimelineObjectHolder

use of io.druid.timeline.TimelineObjectHolder in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method allocatePendingSegment.

@Override
public SegmentIdentifier allocatePendingSegment(final String dataSource, final String sequenceName, final String previousSegmentId, final Interval interval, final String maxVersion) throws IOException {
    Preconditions.checkNotNull(dataSource, "dataSource");
    Preconditions.checkNotNull(sequenceName, "sequenceName");
    Preconditions.checkNotNull(interval, "interval");
    Preconditions.checkNotNull(maxVersion, "maxVersion");
    final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId;
    return connector.retryTransaction(new TransactionCallback<SegmentIdentifier>() {

        @Override
        public SegmentIdentifier inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception {
            final List<byte[]> existingBytes = handle.createQuery(String.format("SELECT payload FROM %s WHERE " + "dataSource = :dataSource AND " + "sequence_name = :sequence_name AND " + "sequence_prev_id = :sequence_prev_id", dbTables.getPendingSegmentsTable())).bind("dataSource", dataSource).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).map(ByteArrayMapper.FIRST).list();
            if (!existingBytes.isEmpty()) {
                final SegmentIdentifier existingIdentifier = jsonMapper.readValue(Iterables.getOnlyElement(existingBytes), SegmentIdentifier.class);
                if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) {
                    log.info("Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
                    return existingIdentifier;
                } else {
                    log.warn("Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull, interval);
                    return null;
                }
            }
            // Make up a pending segment based on existing segments and pending segments in the DB. This works
            // assuming that all tasks inserting segments at a particular point in time are going through the
            // allocatePendingSegment flow. This should be assured through some other mechanism (like task locks).
            final SegmentIdentifier newIdentifier;
            final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
            if (existingChunks.size() > 1) {
                // Not possible to expand more than one chunk with a single segment.
                log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: already have [%,d] chunks.", dataSource, interval, maxVersion, existingChunks.size());
                return null;
            } else {
                SegmentIdentifier max = null;
                if (!existingChunks.isEmpty()) {
                    TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
                    for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
                        if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
                            max = SegmentIdentifier.fromDataSegment(existing.getObject());
                        }
                    }
                }
                final List<SegmentIdentifier> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
                for (SegmentIdentifier pending : pendings) {
                    if (max == null || pending.getVersion().compareTo(max.getVersion()) > 0 || (pending.getVersion().equals(max.getVersion()) && pending.getShardSpec().getPartitionNum() > max.getShardSpec().getPartitionNum())) {
                        max = pending;
                    }
                }
                if (max == null) {
                    newIdentifier = new SegmentIdentifier(dataSource, interval, maxVersion, new NumberedShardSpec(0, 0));
                } else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) {
                    log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", dataSource, interval, maxVersion, max.getIdentifierAsString());
                    return null;
                } else if (max.getShardSpec() instanceof LinearShardSpec) {
                    newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new LinearShardSpec(max.getShardSpec().getPartitionNum() + 1));
                } else if (max.getShardSpec() instanceof NumberedShardSpec) {
                    newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new NumberedShardSpec(max.getShardSpec().getPartitionNum() + 1, ((NumberedShardSpec) max.getShardSpec()).getPartitions()));
                } else {
                    log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: ShardSpec class[%s] used by [%s].", dataSource, interval, maxVersion, max.getShardSpec().getClass(), max.getIdentifierAsString());
                    return null;
                }
            }
            // SELECT -> INSERT can fail due to races; callers must be prepared to retry.
            // Avoiding ON DUPLICATE KEY since it's not portable.
            // Avoiding try/catch since it may cause inadvertent transaction-splitting.
            // UNIQUE key for the row, ensuring sequences do not fork in two directions.
            // Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines
            // have difficulty with large unique keys (see https://github.com/druid-io/druid/issues/2319)
            final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode(Hashing.sha1().newHasher().putBytes(StringUtils.toUtf8(sequenceName)).putByte((byte) 0xff).putBytes(StringUtils.toUtf8(previousSegmentIdNotNull)).hash().asBytes());
            handle.createStatement(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, sequence_name_prev_id_sha1, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, :sequence_name_prev_id_sha1, :payload)", dbTables.getPendingSegmentsTable(), connector.getQuoteString())).bind("id", newIdentifier.getIdentifierAsString()).bind("dataSource", dataSource).bind("created_date", new DateTime().toString()).bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1).bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)).execute();
            log.info("Allocated pending segment [%s] for sequence[%s] (previous = [%s]) in DB", newIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
            return newIdentifier;
        }
    }, ALLOCATE_SEGMENT_QUIET_TRIES, SQLMetadataConnector.DEFAULT_MAX_TRIES);
}
Also used : SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) DateTime(org.joda.time.DateTime) Handle(org.skife.jdbi.v2.Handle) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) PartitionChunk(io.druid.timeline.partition.PartitionChunk) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec)

Aggregations

TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)17 Interval (org.joda.time.Interval)15 DataSegment (io.druid.timeline.DataSegment)11 PartitionChunk (io.druid.timeline.partition.PartitionChunk)8 TableDataSource (io.druid.query.TableDataSource)7 VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)7 Function (com.google.common.base.Function)6 Map (java.util.Map)6 IOException (java.io.IOException)5 List (java.util.List)5 ImmutableList (com.google.common.collect.ImmutableList)4 ImmutableMap (com.google.common.collect.ImmutableMap)4 TimelineLookup (io.druid.timeline.TimelineLookup)4 CountDownLatch (java.util.concurrent.CountDownLatch)4 Test (org.junit.Test)4 ServerSelector (io.druid.client.selector.ServerSelector)3 ShardSpec (io.druid.timeline.partition.ShardSpec)3 DateTime (org.joda.time.DateTime)3 Pair (io.druid.java.util.common.Pair)2 NoneShardSpec (io.druid.timeline.partition.NoneShardSpec)2