Search in sources :

Example 1 with PartitionChunk

use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class CompactSegmentsTest method assertLastSegmentNotCompacted.

private void assertLastSegmentNotCompacted(CompactSegments compactSegments) {
    // Segments of the latest interval should not be compacted
    for (int i = 0; i < 3; i++) {
        final String dataSource = DATA_SOURCE_PREFIX + i;
        final Interval interval = Intervals.of(StringUtils.format("2017-01-09T12:00:00/2017-01-10"));
        List<TimelineObjectHolder<String, DataSegment>> holders = dataSources.get(dataSource).lookup(interval);
        Assert.assertEquals(1, holders.size());
        for (TimelineObjectHolder<String, DataSegment> holder : holders) {
            List<PartitionChunk<DataSegment>> chunks = Lists.newArrayList(holder.getObject());
            Assert.assertEquals(4, chunks.size());
            for (PartitionChunk<DataSegment> chunk : chunks) {
                DataSegment segment = chunk.getObject();
                Assert.assertEquals(interval, segment.getInterval());
                Assert.assertEquals("version", segment.getVersion());
            }
        }
    }
    // Emulating realtime dataSource
    final String dataSource = DATA_SOURCE_PREFIX + 0;
    addMoreData(dataSource, 9);
    CoordinatorStats stats = doCompactSegments(compactSegments);
    Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
    addMoreData(dataSource, 10);
    stats = doCompactSegments(compactSegments);
    Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
}
Also used : CoordinatorStats(org.apache.druid.server.coordinator.CoordinatorStats) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval)

Example 2 with PartitionChunk

use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class VersionedIntervalTimeline method isOvershadowed.

public boolean isOvershadowed(Interval interval, VersionType version, ObjectType object) {
    lock.readLock().lock();
    try {
        TimelineEntry entry = completePartitionsTimeline.get(interval);
        if (entry != null) {
            final int majorVersionCompare = versionComparator.compare(version, entry.getVersion());
            if (majorVersionCompare == 0) {
                for (PartitionChunk<ObjectType> chunk : entry.partitionHolder) {
                    if (chunk.getObject().overshadows(object)) {
                        return true;
                    }
                }
                return false;
            } else {
                return majorVersionCompare < 0;
            }
        }
        Interval lower = completePartitionsTimeline.floorKey(new Interval(interval.getStart(), DateTimes.MAX));
        if (lower == null || !lower.overlaps(interval)) {
            return false;
        }
        Interval prev = null;
        Interval curr = lower;
        do {
            if (// no further keys
            curr == null || // a discontinuity
            (prev != null && curr.getStartMillis() > prev.getEndMillis())) {
                return false;
            }
            final TimelineEntry timelineEntry = completePartitionsTimeline.get(curr);
            final int versionCompare = versionComparator.compare(version, timelineEntry.getVersion());
            // lower or same version
            if (versionCompare > 0) {
                return false;
            } else if (versionCompare == 0) {
                // Intentionally use the Iterators API instead of the stream API for performance.
                // noinspection ConstantConditions
                final boolean nonOvershadowedObject = Iterators.all(timelineEntry.partitionHolder.iterator(), chunk -> !chunk.getObject().overshadows(object));
                if (nonOvershadowedObject) {
                    return false;
                }
            }
            prev = curr;
            curr = completePartitionsTimeline.higherKey(curr);
        } while (interval.getEndMillis() > prev.getEndMillis());
        return true;
    } finally {
        lock.readLock().unlock();
    }
}
Also used : Comparators(org.apache.druid.java.util.common.guava.Comparators) CollectionUtils(org.apache.druid.utils.CollectionUtils) HashMap(java.util.HashMap) ReentrantReadWriteLock(java.util.concurrent.locks.ReentrantReadWriteLock) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) FluentIterable(com.google.common.collect.FluentIterable) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Map(java.util.Map) UOE(org.apache.druid.java.util.common.UOE) StreamSupport(java.util.stream.StreamSupport) Nullable(javax.annotation.Nullable) DateTimes(org.apache.druid.java.util.common.DateTimes) IdentityHashMap(java.util.IdentityHashMap) Iterator(java.util.Iterator) GuardedBy(com.google.errorprone.annotations.concurrent.GuardedBy) Collection(java.util.Collection) Set(java.util.Set) NavigableMap(java.util.NavigableMap) Collectors(java.util.stream.Collectors) Objects(java.util.Objects) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) TreeMap(java.util.TreeMap) Entry(java.util.Map.Entry) Preconditions(com.google.common.base.Preconditions) VisibleForTesting(com.google.common.annotations.VisibleForTesting) Comparator(java.util.Comparator) Interval(org.joda.time.Interval)

Example 3 with PartitionChunk

use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class VersionedIntervalTimelineTestBase method assertValues.

static void assertValues(List<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expected, List<TimelineObjectHolder<String, OvershadowableInteger>> actual) {
    Assert.assertEquals("Sizes did not match.", expected.size(), actual.size());
    Iterator<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expectedIter = expected.iterator();
    Iterator<TimelineObjectHolder<String, OvershadowableInteger>> actualIter = actual.iterator();
    while (expectedIter.hasNext()) {
        Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>> pair = expectedIter.next();
        TimelineObjectHolder<String, OvershadowableInteger> holder = actualIter.next();
        Assert.assertEquals(pair.lhs, holder.getInterval());
        Assert.assertEquals(pair.rhs.lhs, holder.getVersion());
        final List<PartitionChunk<OvershadowableInteger>> expectedChunks = Lists.newArrayList(pair.rhs.rhs);
        final List<PartitionChunk<OvershadowableInteger>> actualChunks = Lists.newArrayList(holder.getObject());
        Assert.assertEquals(expectedChunks.size(), actualChunks.size());
        for (int i = 0; i < expectedChunks.size(); i++) {
            // Check partitionNumber first
            Assert.assertEquals(expectedChunks.get(i), actualChunks.get(i));
            final OvershadowableInteger expectedInteger = expectedChunks.get(i).getObject();
            final OvershadowableInteger actualInteger = actualChunks.get(i).getObject();
            Assert.assertEquals(expectedInteger, actualInteger);
        }
    }
}
Also used : PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) OvershadowableInteger(org.apache.druid.timeline.partition.OvershadowableInteger) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) SingleElementPartitionChunk(org.apache.druid.timeline.partition.SingleElementPartitionChunk) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) NumberedOverwritingPartitionChunk(org.apache.druid.timeline.partition.NumberedOverwritingPartitionChunk) Pair(org.apache.druid.java.util.common.Pair) Interval(org.joda.time.Interval)

Example 4 with PartitionChunk

use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testGetUniqueDimensionsAndMetrics.

@Test
public void testGetUniqueDimensionsAndMetrics() {
    final int numSegmentsPerPartitionChunk = 5;
    final int numPartitionChunksPerTimelineObject = 10;
    final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
    final Interval interval = Intervals.of("2017-01-01/2017-01-02");
    final String version = "1";
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
    for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
        final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
        for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
            final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "dim" + suffix).collect(Collectors.toList());
            final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "met" + suffix).collect(Collectors.toList());
            final DataSegment segment = new DataSegment("ds", interval, version, ImmutableMap.of(), dims, metrics, new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), 1, 1);
            final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(i, numPartitionChunksPerTimelineObject, segment);
            chunks.add(partitionChunk);
        }
        final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(interval, version, new PartitionHolder<>(chunks));
        timelineSegments.add(timelineHolder);
    }
    final String[] expectedDims = new String[] { "dim9", "dim10", "dim11", "dim12", "dim13", "dim8", "dim7", "dim6", "dim5", "dim4", "dim3", "dim2", "dim1", "dim0" };
    final String[] expectedMetrics = new String[] { "met9", "met10", "met11", "met12", "met13", "met8", "met7", "met6", "met5", "met4", "met3", "met2", "met1", "met0" };
    Assert.assertEquals(Arrays.asList(expectedDims), ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null));
    Assert.assertEquals(Arrays.asList(expectedMetrics), ReingestionTimelineUtils.getUniqueMetrics(timelineSegments));
}
Also used : Module(com.google.inject.Module) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Arrays(java.util.Arrays) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) JodaUtils(org.apache.druid.java.util.common.JodaUtils) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) FileUtils(org.apache.druid.java.util.common.FileUtils) Parameterized(org.junit.runners.Parameterized) RetryPolicyConfig(org.apache.druid.indexing.common.RetryPolicyConfig) AfterClass(org.junit.AfterClass) ImmutableSet(com.google.common.collect.ImmutableSet) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CombiningFirehoseFactory(org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) Collectors(java.util.stream.Collectors) GuiceInjectors(org.apache.druid.guice.GuiceInjectors) TestUtils(org.apache.druid.indexing.common.TestUtils) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) InputRow(org.apache.druid.data.input.InputRow) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Firehose(org.apache.druid.data.input.Firehose) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) GuiceAnnotationIntrospector(org.apache.druid.guice.GuiceAnnotationIntrospector) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) RunWith(org.junit.runner.RunWith) AnnotationIntrospectorPair(com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) Binder(com.google.inject.Binder) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) Before(org.junit.Before) ReingestionTimelineUtils(org.apache.druid.indexing.common.ReingestionTimelineUtils) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) HeapMemoryTaskStorage(org.apache.druid.indexing.overlord.HeapMemoryTaskStorage) Paths(java.nio.file.Paths) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) IndexIO(org.apache.druid.segment.IndexIO) GuiceInjectableValues(org.apache.druid.guice.GuiceInjectableValues) TemporaryFolder(org.junit.rules.TemporaryFolder) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 5 with PartitionChunk

use of org.apache.druid.timeline.partition.PartitionChunk in project druid by druid-io.

the class IngestSegmentFirehoseFactory method connect.

@Override
public Firehose connect(InputRowParser inputRowParser, File temporaryDirectory) throws ParseException {
    log.debug("Connecting firehose: dataSource[%s], interval[%s], segmentIds[%s]", dataSource, interval, segmentIds);
    final List<TimelineObjectHolder<String, DataSegment>> timeLineSegments = getTimeline();
    // Download all segments locally.
    // Note: this requires enough local storage space to fit all of the segments, even though
    // IngestSegmentFirehose iterates over the segments in series. We may want to change this
    // to download files lazily, perhaps sharing code with PrefetchableTextFilesFirehoseFactory.
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    Map<DataSegment, File> segmentFileMap = Maps.newLinkedHashMap();
    for (TimelineObjectHolder<String, DataSegment> holder : timeLineSegments) {
        for (PartitionChunk<DataSegment> chunk : holder.getObject()) {
            final DataSegment segment = chunk.getObject();
            segmentFileMap.computeIfAbsent(segment, k -> {
                try {
                    return segmentCacheManager.getSegmentFiles(segment);
                } catch (SegmentLoadingException e) {
                    throw new RuntimeException(e);
                }
            });
        }
    }
    final List<String> dims = ReingestionTimelineUtils.getDimensionsToReingest(dimensions, inputRowParser.getParseSpec().getDimensionsSpec(), timeLineSegments);
    final List<String> metricsList = metrics == null ? ReingestionTimelineUtils.getUniqueMetrics(timeLineSegments) : metrics;
    final List<WindowedStorageAdapter> adapters = Lists.newArrayList(Iterables.concat(Iterables.transform(timeLineSegments, new Function<TimelineObjectHolder<String, DataSegment>, Iterable<WindowedStorageAdapter>>() {

        @Override
        public Iterable<WindowedStorageAdapter> apply(final TimelineObjectHolder<String, DataSegment> holder) {
            return Iterables.transform(holder.getObject(), new Function<PartitionChunk<DataSegment>, WindowedStorageAdapter>() {

                @Override
                public WindowedStorageAdapter apply(final PartitionChunk<DataSegment> input) {
                    final DataSegment segment = input.getObject();
                    try {
                        return new WindowedStorageAdapter(new QueryableIndexStorageAdapter(indexIO.loadIndex(Preconditions.checkNotNull(segmentFileMap.get(segment), "File for segment %s", segment.getId()))), holder.getInterval());
                    } catch (IOException e) {
                        throw new RuntimeException(e);
                    }
                }
            });
        }
    })));
    final TransformSpec transformSpec = TransformSpec.fromInputRowParser(inputRowParser);
    return new IngestSegmentFirehose(adapters, transformSpec, dims, metricsList, dimFilter);
}
Also used : IngestSegmentFirehose(org.apache.druid.segment.realtime.firehose.IngestSegmentFirehose) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) IOException(java.io.IOException) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Function(com.google.common.base.Function) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) File(java.io.File) WindowedStorageAdapter(org.apache.druid.segment.realtime.firehose.WindowedStorageAdapter)

Aggregations

PartitionChunk (org.apache.druid.timeline.partition.PartitionChunk)8 Interval (org.joda.time.Interval)7 Preconditions (com.google.common.base.Preconditions)5 DataSegment (org.apache.druid.timeline.DataSegment)5 TimelineObjectHolder (org.apache.druid.timeline.TimelineObjectHolder)5 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 IOException (java.io.IOException)4 ArrayList (java.util.ArrayList)4 Collection (java.util.Collection)4 List (java.util.List)4 Map (java.util.Map)4 Collectors (java.util.stream.Collectors)4 Intervals (org.apache.druid.java.util.common.Intervals)4 StringUtils (org.apache.druid.java.util.common.StringUtils)4 VisibleForTesting (com.google.common.annotations.VisibleForTesting)3 ImmutableList (com.google.common.collect.ImmutableList)3 Set (java.util.Set)3 DateTimes (org.apache.druid.java.util.common.DateTimes)3 Pair (org.apache.druid.java.util.common.Pair)3 TransformSpec (org.apache.druid.segment.transform.TransformSpec)3