Search in sources :

Example 6 with PartitionChunk

use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class SchemalessIndexTest method makeAppendedMMappedIndex.

private static QueryableIndex makeAppendedMMappedIndex(Iterable<Pair<String, AggregatorFactory[]>> files, final List<Interval> intervals) {
    try {
        File tmpFile = File.createTempFile("yay", "boo");
        tmpFile.delete();
        File mergedFile = new File(tmpFile, "merged");
        mergedFile.mkdirs();
        mergedFile.deleteOnExit();
        List<File> filesToMap = makeFilesToMap(tmpFile, files);
        VersionedIntervalTimeline<Integer, File> timeline = new VersionedIntervalTimeline<Integer, File>(Ordering.natural().nullsFirst());
        ShardSpec noneShardSpec = NoneShardSpec.instance();
        for (int i = 0; i < intervals.size(); i++) {
            timeline.add(intervals.get(i), i, noneShardSpec.createChunk(filesToMap.get(i)));
        }
        final List<IndexableAdapter> adapters = Lists.newArrayList(Iterables.concat(// TimelineObjectHolder is actually an iterable of iterable of indexable adapters
        Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<Integer, File>, Iterable<IndexableAdapter>>() {

            @Override
            public Iterable<IndexableAdapter> apply(final TimelineObjectHolder<Integer, File> timelineObjectHolder) {
                return Iterables.transform(timelineObjectHolder.getObject(), // Each chunk can be used to build the actual IndexableAdapter
                new Function<PartitionChunk<File>, IndexableAdapter>() {

                    @Override
                    public IndexableAdapter apply(PartitionChunk<File> chunk) {
                        try {
                            return new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(INDEX_IO.loadIndex(chunk.getObject())), new Predicate<Rowboat>() {

                                @Override
                                public boolean apply(Rowboat input) {
                                    return timelineObjectHolder.getInterval().contains(input.getTimestamp());
                                }
                            });
                        } catch (IOException e) {
                            throw Throwables.propagate(e);
                        }
                    }
                });
            }
        })));
        return INDEX_IO.loadIndex(INDEX_MERGER.append(adapters, null, mergedFile, indexSpec));
    } catch (IOException e) {
        throw Throwables.propagate(e);
    }
}
Also used : IOException(java.io.IOException) ShardSpec(io.druid.timeline.partition.ShardSpec) NoneShardSpec(io.druid.timeline.partition.NoneShardSpec) Function(com.google.common.base.Function) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) PartitionChunk(io.druid.timeline.partition.PartitionChunk) File(java.io.File) Interval(org.joda.time.Interval)

Example 7 with PartitionChunk

use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class ServerManager method getQueryRunnerForIntervals.

@Override
public <T> QueryRunner<T> getQueryRunnerForIntervals(Query<T> query, Iterable<Interval> intervals) {
    final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
    if (factory == null) {
        throw new ISE("Unknown query type[%s].", query.getClass());
    }
    final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
    final Function<Query<T>, ServiceMetricEvent.Builder> builderFn = getBuilderFn(toolChest);
    final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
    DataSource dataSource = query.getDataSource();
    if (!(dataSource instanceof TableDataSource)) {
        throw new UnsupportedOperationException("data source type '" + dataSource.getClass().getName() + "' unsupported");
    }
    String dataSourceName = getDataSourceName(dataSource);
    final VersionedIntervalTimeline<String, ReferenceCountingSegment> timeline = dataSources.get(dataSourceName);
    if (timeline == null) {
        return new NoopQueryRunner<T>();
    }
    FunctionalIterable<QueryRunner<T>> queryRunners = FunctionalIterable.create(intervals).transformCat(new Function<Interval, Iterable<TimelineObjectHolder<String, ReferenceCountingSegment>>>() {

        @Override
        public Iterable<TimelineObjectHolder<String, ReferenceCountingSegment>> apply(Interval input) {
            return timeline.lookup(input);
        }
    }).transformCat(new Function<TimelineObjectHolder<String, ReferenceCountingSegment>, Iterable<QueryRunner<T>>>() {

        @Override
        public Iterable<QueryRunner<T>> apply(@Nullable final TimelineObjectHolder<String, ReferenceCountingSegment> holder) {
            if (holder == null) {
                return null;
            }
            return FunctionalIterable.create(holder.getObject()).transform(new Function<PartitionChunk<ReferenceCountingSegment>, QueryRunner<T>>() {

                @Override
                public QueryRunner<T> apply(PartitionChunk<ReferenceCountingSegment> input) {
                    return buildAndDecorateQueryRunner(factory, toolChest, input.getObject(), new SegmentDescriptor(holder.getInterval(), holder.getVersion(), input.getChunkNumber()), builderFn, cpuTimeAccumulator);
                }
            });
        }
    });
    return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<T>(toolChest.mergeResults(factory.mergeRunners(exec, queryRunners)), toolChest), builderFn, emitter, cpuTimeAccumulator, true);
}
Also used : ReferenceCountingSegment(io.druid.segment.ReferenceCountingSegment) Query(io.druid.query.Query) FunctionalIterable(io.druid.java.util.common.guava.FunctionalIterable) Function(com.google.common.base.Function) NoopQueryRunner(io.druid.query.NoopQueryRunner) SegmentDescriptor(io.druid.query.SegmentDescriptor) ISE(io.druid.java.util.common.ISE) PartitionChunk(io.druid.timeline.partition.PartitionChunk) MetricsEmittingQueryRunner(io.druid.query.MetricsEmittingQueryRunner) ReportTimelineMissingSegmentQueryRunner(io.druid.query.ReportTimelineMissingSegmentQueryRunner) BySegmentQueryRunner(io.druid.query.BySegmentQueryRunner) SpecificSegmentQueryRunner(io.druid.query.spec.SpecificSegmentQueryRunner) ReferenceCountingSegmentQueryRunner(io.druid.query.ReferenceCountingSegmentQueryRunner) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) CPUTimeMetricQueryRunner(io.druid.query.CPUTimeMetricQueryRunner) NoopQueryRunner(io.druid.query.NoopQueryRunner) CachingQueryRunner(io.druid.client.CachingQueryRunner) QueryRunner(io.druid.query.QueryRunner) TableDataSource(io.druid.query.TableDataSource) DataSource(io.druid.query.DataSource) AtomicLong(java.util.concurrent.atomic.AtomicLong) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) TableDataSource(io.druid.query.TableDataSource) Interval(org.joda.time.Interval)

Example 8 with PartitionChunk

use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class AppendTask method merge.

@Override
public File merge(final TaskToolbox toolbox, final Map<DataSegment, File> segments, final File outDir) throws Exception {
    VersionedIntervalTimeline<String, DataSegment> timeline = new VersionedIntervalTimeline<String, DataSegment>(Ordering.<String>natural().nullsFirst());
    for (DataSegment segment : segments.keySet()) {
        timeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(segment));
    }
    final Iterable<SegmentToMergeHolder> segmentsToMerge = Iterables.concat(Iterables.transform(timeline.lookup(new Interval("1000-01-01/3000-01-01")), new Function<TimelineObjectHolder<String, DataSegment>, Iterable<SegmentToMergeHolder>>() {

        @Override
        public Iterable<SegmentToMergeHolder> apply(final TimelineObjectHolder<String, DataSegment> input) {
            return Iterables.transform(input.getObject(), new Function<PartitionChunk<DataSegment>, SegmentToMergeHolder>() {

                @Nullable
                @Override
                public SegmentToMergeHolder apply(PartitionChunk<DataSegment> chunkInput) {
                    DataSegment segment = chunkInput.getObject();
                    return new SegmentToMergeHolder(segment, input.getInterval(), Preconditions.checkNotNull(segments.get(segment), "File for segment %s", segment.getIdentifier()));
                }
            });
        }
    }));
    List<IndexableAdapter> adapters = Lists.newArrayList();
    for (final SegmentToMergeHolder holder : segmentsToMerge) {
        adapters.add(new RowboatFilteringIndexAdapter(new QueryableIndexIndexableAdapter(toolbox.getIndexIO().loadIndex(holder.getFile())), new Predicate<Rowboat>() {

            @Override
            public boolean apply(Rowboat input) {
                return holder.getInterval().contains(input.getTimestamp());
            }
        }));
    }
    IndexMerger indexMerger = buildV9Directly ? toolbox.getIndexMergerV9() : toolbox.getIndexMerger();
    return indexMerger.append(adapters, aggregators == null ? null : aggregators.toArray(new AggregatorFactory[aggregators.size()]), outDir, indexSpec);
}
Also used : IndexMerger(io.druid.segment.IndexMerger) RowboatFilteringIndexAdapter(io.druid.segment.RowboatFilteringIndexAdapter) DataSegment(io.druid.timeline.DataSegment) Predicate(com.google.common.base.Predicate) Function(com.google.common.base.Function) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) IndexableAdapter(io.druid.segment.IndexableAdapter) QueryableIndexIndexableAdapter(io.druid.segment.QueryableIndexIndexableAdapter) PartitionChunk(io.druid.timeline.partition.PartitionChunk) Nullable(javax.annotation.Nullable) Rowboat(io.druid.segment.Rowboat) Interval(org.joda.time.Interval)

Example 9 with PartitionChunk

use of io.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class IndexerSQLMetadataStorageCoordinator method allocatePendingSegment.

@Override
public SegmentIdentifier allocatePendingSegment(final String dataSource, final String sequenceName, final String previousSegmentId, final Interval interval, final String maxVersion) throws IOException {
    Preconditions.checkNotNull(dataSource, "dataSource");
    Preconditions.checkNotNull(sequenceName, "sequenceName");
    Preconditions.checkNotNull(interval, "interval");
    Preconditions.checkNotNull(maxVersion, "maxVersion");
    final String previousSegmentIdNotNull = previousSegmentId == null ? "" : previousSegmentId;
    return connector.retryTransaction(new TransactionCallback<SegmentIdentifier>() {

        @Override
        public SegmentIdentifier inTransaction(Handle handle, TransactionStatus transactionStatus) throws Exception {
            final List<byte[]> existingBytes = handle.createQuery(String.format("SELECT payload FROM %s WHERE " + "dataSource = :dataSource AND " + "sequence_name = :sequence_name AND " + "sequence_prev_id = :sequence_prev_id", dbTables.getPendingSegmentsTable())).bind("dataSource", dataSource).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).map(ByteArrayMapper.FIRST).list();
            if (!existingBytes.isEmpty()) {
                final SegmentIdentifier existingIdentifier = jsonMapper.readValue(Iterables.getOnlyElement(existingBytes), SegmentIdentifier.class);
                if (existingIdentifier.getInterval().getStartMillis() == interval.getStartMillis() && existingIdentifier.getInterval().getEndMillis() == interval.getEndMillis()) {
                    log.info("Found existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
                    return existingIdentifier;
                } else {
                    log.warn("Cannot use existing pending segment [%s] for sequence[%s] (previous = [%s]) in DB, " + "does not match requested interval[%s]", existingIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull, interval);
                    return null;
                }
            }
            // Make up a pending segment based on existing segments and pending segments in the DB. This works
            // assuming that all tasks inserting segments at a particular point in time are going through the
            // allocatePendingSegment flow. This should be assured through some other mechanism (like task locks).
            final SegmentIdentifier newIdentifier;
            final List<TimelineObjectHolder<String, DataSegment>> existingChunks = getTimelineForIntervalsWithHandle(handle, dataSource, ImmutableList.of(interval)).lookup(interval);
            if (existingChunks.size() > 1) {
                // Not possible to expand more than one chunk with a single segment.
                log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: already have [%,d] chunks.", dataSource, interval, maxVersion, existingChunks.size());
                return null;
            } else {
                SegmentIdentifier max = null;
                if (!existingChunks.isEmpty()) {
                    TimelineObjectHolder<String, DataSegment> existingHolder = Iterables.getOnlyElement(existingChunks);
                    for (PartitionChunk<DataSegment> existing : existingHolder.getObject()) {
                        if (max == null || max.getShardSpec().getPartitionNum() < existing.getObject().getShardSpec().getPartitionNum()) {
                            max = SegmentIdentifier.fromDataSegment(existing.getObject());
                        }
                    }
                }
                final List<SegmentIdentifier> pendings = getPendingSegmentsForIntervalWithHandle(handle, dataSource, interval);
                for (SegmentIdentifier pending : pendings) {
                    if (max == null || pending.getVersion().compareTo(max.getVersion()) > 0 || (pending.getVersion().equals(max.getVersion()) && pending.getShardSpec().getPartitionNum() > max.getShardSpec().getPartitionNum())) {
                        max = pending;
                    }
                }
                if (max == null) {
                    newIdentifier = new SegmentIdentifier(dataSource, interval, maxVersion, new NumberedShardSpec(0, 0));
                } else if (!max.getInterval().equals(interval) || max.getVersion().compareTo(maxVersion) > 0) {
                    log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: conflicting segment[%s].", dataSource, interval, maxVersion, max.getIdentifierAsString());
                    return null;
                } else if (max.getShardSpec() instanceof LinearShardSpec) {
                    newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new LinearShardSpec(max.getShardSpec().getPartitionNum() + 1));
                } else if (max.getShardSpec() instanceof NumberedShardSpec) {
                    newIdentifier = new SegmentIdentifier(dataSource, max.getInterval(), max.getVersion(), new NumberedShardSpec(max.getShardSpec().getPartitionNum() + 1, ((NumberedShardSpec) max.getShardSpec()).getPartitions()));
                } else {
                    log.warn("Cannot allocate new segment for dataSource[%s], interval[%s], maxVersion[%s]: ShardSpec class[%s] used by [%s].", dataSource, interval, maxVersion, max.getShardSpec().getClass(), max.getIdentifierAsString());
                    return null;
                }
            }
            // SELECT -> INSERT can fail due to races; callers must be prepared to retry.
            // Avoiding ON DUPLICATE KEY since it's not portable.
            // Avoiding try/catch since it may cause inadvertent transaction-splitting.
            // UNIQUE key for the row, ensuring sequences do not fork in two directions.
            // Using a single column instead of (sequence_name, sequence_prev_id) as some MySQL storage engines
            // have difficulty with large unique keys (see https://github.com/druid-io/druid/issues/2319)
            final String sequenceNamePrevIdSha1 = BaseEncoding.base16().encode(Hashing.sha1().newHasher().putBytes(StringUtils.toUtf8(sequenceName)).putByte((byte) 0xff).putBytes(StringUtils.toUtf8(previousSegmentIdNotNull)).hash().asBytes());
            handle.createStatement(String.format("INSERT INTO %1$s (id, dataSource, created_date, start, %2$send%2$s, sequence_name, sequence_prev_id, sequence_name_prev_id_sha1, payload) " + "VALUES (:id, :dataSource, :created_date, :start, :end, :sequence_name, :sequence_prev_id, :sequence_name_prev_id_sha1, :payload)", dbTables.getPendingSegmentsTable(), connector.getQuoteString())).bind("id", newIdentifier.getIdentifierAsString()).bind("dataSource", dataSource).bind("created_date", new DateTime().toString()).bind("start", interval.getStart().toString()).bind("end", interval.getEnd().toString()).bind("sequence_name", sequenceName).bind("sequence_prev_id", previousSegmentIdNotNull).bind("sequence_name_prev_id_sha1", sequenceNamePrevIdSha1).bind("payload", jsonMapper.writeValueAsBytes(newIdentifier)).execute();
            log.info("Allocated pending segment [%s] for sequence[%s] (previous = [%s]) in DB", newIdentifier.getIdentifierAsString(), sequenceName, previousSegmentIdNotNull);
            return newIdentifier;
        }
    }, ALLOCATE_SEGMENT_QUIET_TRIES, SQLMetadataConnector.DEFAULT_MAX_TRIES);
}
Also used : SegmentIdentifier(io.druid.segment.realtime.appenderator.SegmentIdentifier) LinearShardSpec(io.druid.timeline.partition.LinearShardSpec) TransactionStatus(org.skife.jdbi.v2.TransactionStatus) DataSegment(io.druid.timeline.DataSegment) SQLException(java.sql.SQLException) IOException(java.io.IOException) CallbackFailedException(org.skife.jdbi.v2.exceptions.CallbackFailedException) DateTime(org.joda.time.DateTime) Handle(org.skife.jdbi.v2.Handle) TimelineObjectHolder(io.druid.timeline.TimelineObjectHolder) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) PartitionChunk(io.druid.timeline.partition.PartitionChunk) NumberedShardSpec(io.druid.timeline.partition.NumberedShardSpec)

Aggregations

PartitionChunk (io.druid.timeline.partition.PartitionChunk)9 TimelineObjectHolder (io.druid.timeline.TimelineObjectHolder)7 Function (com.google.common.base.Function)6 Interval (org.joda.time.Interval)5 IOException (java.io.IOException)4 FunctionalIterable (io.druid.java.util.common.guava.FunctionalIterable)3 Query (io.druid.query.Query)3 QueryRunner (io.druid.query.QueryRunner)3 SegmentDescriptor (io.druid.query.SegmentDescriptor)3 VersionedIntervalTimeline (io.druid.timeline.VersionedIntervalTimeline)3 ShardSpec (io.druid.timeline.partition.ShardSpec)3 CachingQueryRunner (io.druid.client.CachingQueryRunner)2 BySegmentQueryRunner (io.druid.query.BySegmentQueryRunner)2 CPUTimeMetricQueryRunner (io.druid.query.CPUTimeMetricQueryRunner)2 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)2 MetricsEmittingQueryRunner (io.druid.query.MetricsEmittingQueryRunner)2 NoopQueryRunner (io.druid.query.NoopQueryRunner)2 ReferenceCountingSegmentQueryRunner (io.druid.query.ReferenceCountingSegmentQueryRunner)2 DataSegment (io.druid.timeline.DataSegment)2 File (java.io.File)2