Examples with PartitionHolder - org.apache.druid.timeline.partition.PartitionHolder

Example 1 with PartitionHolder

use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.

the class DruidInputSource method getTimelineForSegmentIds.

public static List<TimelineObjectHolder<String, DataSegment>> getTimelineForSegmentIds(CoordinatorClient coordinatorClient, String dataSource, List<WindowedSegmentId> segmentIds) {
    final SortedMap<Interval, TimelineObjectHolder<String, DataSegment>> timeline = new TreeMap<>(Comparators.intervalsByStartThenEnd());
    for (WindowedSegmentId windowedSegmentId : Preconditions.checkNotNull(segmentIds, "segmentIds")) {
        final DataSegment segment = coordinatorClient.fetchUsedSegment(dataSource, windowedSegmentId.getSegmentId());
        for (Interval interval : windowedSegmentId.getIntervals()) {
            final TimelineObjectHolder<String, DataSegment> existingHolder = timeline.get(interval);
            if (existingHolder != null) {
                if (!existingHolder.getVersion().equals(segment.getVersion())) {
                    throw new ISE("Timeline segments with the same interval should have the same version: " + "existing version[%s] vs new segment[%s]", existingHolder.getVersion(), segment);
                }
                existingHolder.getObject().add(segment.getShardSpec().createChunk(segment));
            } else {
                timeline.put(interval, new TimelineObjectHolder<>(interval, segment.getInterval(), segment.getVersion(), new PartitionHolder<>(segment.getShardSpec().createChunk(segment))));
            }
        }
    }
    // Validate that none of the given windows overlaps (except for when multiple segments share exactly the
    // same interval).
    Interval lastInterval = null;
    for (Interval interval : timeline.keySet()) {
        if (lastInterval != null && interval.overlaps(lastInterval)) {
            throw new IAE("Distinct intervals in input segments may not overlap: [%s] vs [%s]", lastInterval, interval);
        }
        lastInterval = interval;
    }
    return new ArrayList<>(timeline.values());
}

Also used : PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) ArrayList(java.util.ArrayList) TreeMap(java.util.TreeMap) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) IAE(org.apache.druid.java.util.common.IAE) DataSegment(org.apache.druid.timeline.DataSegment) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) ISE(org.apache.druid.java.util.common.ISE) Interval(org.joda.time.Interval)

Example 2 with PartitionHolder

use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.

the class VersionedIntervalTimelineTestBase method assertValues.

static void assertValues(List<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expected, List<TimelineObjectHolder<String, OvershadowableInteger>> actual) {
    Assert.assertEquals("Sizes did not match.", expected.size(), actual.size());
    Iterator<Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>>> expectedIter = expected.iterator();
    Iterator<TimelineObjectHolder<String, OvershadowableInteger>> actualIter = actual.iterator();
    while (expectedIter.hasNext()) {
        Pair<Interval, Pair<String, PartitionHolder<OvershadowableInteger>>> pair = expectedIter.next();
        TimelineObjectHolder<String, OvershadowableInteger> holder = actualIter.next();
        Assert.assertEquals(pair.lhs, holder.getInterval());
        Assert.assertEquals(pair.rhs.lhs, holder.getVersion());
        final List<PartitionChunk<OvershadowableInteger>> expectedChunks = Lists.newArrayList(pair.rhs.rhs);
        final List<PartitionChunk<OvershadowableInteger>> actualChunks = Lists.newArrayList(holder.getObject());
        Assert.assertEquals(expectedChunks.size(), actualChunks.size());
        for (int i = 0; i < expectedChunks.size(); i++) {
            // Check partitionNumber first
            Assert.assertEquals(expectedChunks.get(i), actualChunks.get(i));
            final OvershadowableInteger expectedInteger = expectedChunks.get(i).getObject();
            final OvershadowableInteger actualInteger = actualChunks.get(i).getObject();
            Assert.assertEquals(expectedInteger, actualInteger);
        }
    }
}

Also used : PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) OvershadowableInteger(org.apache.druid.timeline.partition.OvershadowableInteger) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) SingleElementPartitionChunk(org.apache.druid.timeline.partition.SingleElementPartitionChunk) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) NumberedOverwritingPartitionChunk(org.apache.druid.timeline.partition.NumberedOverwritingPartitionChunk) Pair(org.apache.druid.java.util.common.Pair) Interval(org.joda.time.Interval)

Example 3 with PartitionHolder

use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.

the class VersionedIntervalTimeline method addAll.

public void addAll(final Iterator<PartitionChunkEntry<VersionType, ObjectType>> objects) {
    lock.writeLock().lock();
    try {
        final IdentityHashMap<TimelineEntry, Interval> allEntries = new IdentityHashMap<>();
        while (objects.hasNext()) {
            PartitionChunkEntry<VersionType, ObjectType> chunkEntry = objects.next();
            PartitionChunk<ObjectType> object = chunkEntry.getChunk();
            Interval interval = chunkEntry.getInterval();
            VersionType version = chunkEntry.getVersion();
            Map<VersionType, TimelineEntry> exists = allTimelineEntries.get(interval);
            TimelineEntry entry;
            if (exists == null) {
                entry = new TimelineEntry(interval, version, new PartitionHolder<>(object));
                TreeMap<VersionType, TimelineEntry> versionEntry = new TreeMap<>(versionComparator);
                versionEntry.put(version, entry);
                allTimelineEntries.put(interval, versionEntry);
                numObjects.incrementAndGet();
            } else {
                entry = exists.get(version);
                if (entry == null) {
                    entry = new TimelineEntry(interval, version, new PartitionHolder<>(object));
                    exists.put(version, entry);
                    numObjects.incrementAndGet();
                } else {
                    PartitionHolder<ObjectType> partitionHolder = entry.getPartitionHolder();
                    if (partitionHolder.add(object)) {
                        numObjects.incrementAndGet();
                    }
                }
            }
            allEntries.put(entry, interval);
        }
        // "isComplete" is O(objects in holder) so defer it to the end of addAll.
        for (Entry<TimelineEntry, Interval> entry : allEntries.entrySet()) {
            Interval interval = entry.getValue();
            if (entry.getKey().getPartitionHolder().isComplete()) {
                add(completePartitionsTimeline, interval, entry.getKey());
            }
            add(incompletePartitionsTimeline, interval, entry.getKey());
        }
    } finally {
        lock.writeLock().unlock();
    }
}

Also used : PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) IdentityHashMap(java.util.IdentityHashMap) TreeMap(java.util.TreeMap) Interval(org.joda.time.Interval)

Example 4 with PartitionHolder

use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testGetUniqueDimensionsAndMetrics.

@Test
public void testGetUniqueDimensionsAndMetrics() {
    final int numSegmentsPerPartitionChunk = 5;
    final int numPartitionChunksPerTimelineObject = 10;
    final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
    final Interval interval = Intervals.of("2017-01-01/2017-01-02");
    final String version = "1";
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
    for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
        final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
        for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
            final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "dim" + suffix).collect(Collectors.toList());
            final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "met" + suffix).collect(Collectors.toList());
            final DataSegment segment = new DataSegment("ds", interval, version, ImmutableMap.of(), dims, metrics, new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), 1, 1);
            final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(i, numPartitionChunksPerTimelineObject, segment);
            chunks.add(partitionChunk);
        }
        final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(interval, version, new PartitionHolder<>(chunks));
        timelineSegments.add(timelineHolder);
    }
    final String[] expectedDims = new String[] { "dim9", "dim10", "dim11", "dim12", "dim13", "dim8", "dim7", "dim6", "dim5", "dim4", "dim3", "dim2", "dim1", "dim0" };
    final String[] expectedMetrics = new String[] { "met9", "met10", "met11", "met12", "met13", "met8", "met7", "met6", "met5", "met4", "met3", "met2", "met1", "met0" };
    Assert.assertEquals(Arrays.asList(expectedDims), ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null));
    Assert.assertEquals(Arrays.asList(expectedMetrics), ReingestionTimelineUtils.getUniqueMetrics(timelineSegments));
}

Also used : Module(com.google.inject.Module) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Arrays(java.util.Arrays) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) JodaUtils(org.apache.druid.java.util.common.JodaUtils) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) FileUtils(org.apache.druid.java.util.common.FileUtils) Parameterized(org.junit.runners.Parameterized) RetryPolicyConfig(org.apache.druid.indexing.common.RetryPolicyConfig) AfterClass(org.junit.AfterClass) ImmutableSet(com.google.common.collect.ImmutableSet) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CombiningFirehoseFactory(org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) Collectors(java.util.stream.Collectors) GuiceInjectors(org.apache.druid.guice.GuiceInjectors) TestUtils(org.apache.druid.indexing.common.TestUtils) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) InputRow(org.apache.druid.data.input.InputRow) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Firehose(org.apache.druid.data.input.Firehose) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) GuiceAnnotationIntrospector(org.apache.druid.guice.GuiceAnnotationIntrospector) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) RunWith(org.junit.runner.RunWith) AnnotationIntrospectorPair(com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) Binder(com.google.inject.Binder) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) Before(org.junit.Before) ReingestionTimelineUtils(org.apache.druid.indexing.common.ReingestionTimelineUtils) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) HeapMemoryTaskStorage(org.apache.druid.indexing.overlord.HeapMemoryTaskStorage) Paths(java.nio.file.Paths) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) IndexIO(org.apache.druid.segment.IndexIO) GuiceInjectableValues(org.apache.druid.guice.GuiceInjectableValues) TemporaryFolder(org.junit.rules.TemporaryFolder) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 5 with PartitionHolder

use of org.apache.druid.timeline.partition.PartitionHolder in project druid by druid-io.

the class DruidInputSource method fixedFormatReader.

@Override
protected InputSourceReader fixedFormatReader(InputRowSchema inputRowSchema, @Nullable File temporaryDirectory) {
    final SegmentCacheManager segmentCacheManager = segmentCacheManagerFactory.manufacturate(temporaryDirectory);
    final List<TimelineObjectHolder<String, DataSegment>> timeline = createTimeline();
    final Iterator<DruidSegmentInputEntity> entityIterator = FluentIterable.from(timeline).transformAndConcat(holder -> {
        // noinspection ConstantConditions
        final PartitionHolder<DataSegment> partitionHolder = holder.getObject();
        // noinspection ConstantConditions
        return FluentIterable.from(partitionHolder).transform(chunk -> new DruidSegmentInputEntity(segmentCacheManager, chunk.getObject(), holder.getInterval()));
    }).iterator();
    final DruidSegmentInputFormat inputFormat = new DruidSegmentInputFormat(indexIO, dimFilter);
    final InputRowSchema inputRowSchemaToUse;
    if (taskConfig.isIgnoreTimestampSpecForDruidInputSource()) {
        // Legacy compatibility mode; see https://github.com/apache/druid/pull/10267.
        LOG.warn("Ignoring the provided timestampSpec and reading the __time column instead. To use timestampSpecs with " + "the 'druid' input source, set druid.indexer.task.ignoreTimestampSpecForDruidInputSource to false.");
        inputRowSchemaToUse = new InputRowSchema(new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, STANDARD_TIME_COLUMN_FORMATS.iterator().next(), null), inputRowSchema.getDimensionsSpec(), inputRowSchema.getColumnsFilter().plus(ColumnHolder.TIME_COLUMN_NAME));
    } else {
        inputRowSchemaToUse = inputRowSchema;
    }
    if (ColumnHolder.TIME_COLUMN_NAME.equals(inputRowSchemaToUse.getTimestampSpec().getTimestampColumn()) && !STANDARD_TIME_COLUMN_FORMATS.contains(inputRowSchemaToUse.getTimestampSpec().getTimestampFormat())) {
        // Slight chance the user did this intentionally, but not likely. Log a warning.
        LOG.warn("The provided timestampSpec refers to the %s column without using format %s. If you wanted to read the " + "column as-is, switch formats.", inputRowSchemaToUse.getTimestampSpec().getTimestampColumn(), STANDARD_TIME_COLUMN_FORMATS);
    }
    return new InputEntityIteratingReader(inputRowSchemaToUse, inputFormat, entityIterator, temporaryDirectory);
}

Also used : SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) JsonProperty(com.fasterxml.jackson.annotation.JsonProperty) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) TaskConfig(org.apache.druid.indexing.common.config.TaskConfig) Comparators(org.apache.druid.java.util.common.guava.Comparators) AbstractInputSource(org.apache.druid.data.input.AbstractInputSource) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) FluentIterable(com.google.common.collect.FluentIterable) Map(java.util.Map) InputSourceReader(org.apache.druid.data.input.InputSourceReader) IAE(org.apache.druid.java.util.common.IAE) JacksonInject(com.fasterxml.jackson.annotation.JacksonInject) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) InputFormat(org.apache.druid.data.input.InputFormat) Collection(java.util.Collection) SplitHintSpec(org.apache.druid.data.input.SplitHintSpec) SplittableInputSource(org.apache.druid.data.input.impl.SplittableInputSource) ISE(org.apache.druid.java.util.common.ISE) Objects(java.util.Objects) MaxSizeSplitHintSpec(org.apache.druid.data.input.MaxSizeSplitHintSpec) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) Stream(java.util.stream.Stream) DimFilter(org.apache.druid.query.filter.DimFilter) DataSegment(org.apache.druid.timeline.DataSegment) SortedMap(java.util.SortedMap) Logger(org.apache.druid.java.util.common.logger.Logger) Streams(org.apache.druid.utils.Streams) InputSplit(org.apache.druid.data.input.InputSplit) Duration(org.joda.time.Duration) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) HashMap(java.util.HashMap) InputRowSchema(org.apache.druid.data.input.InputRowSchema) Iterators(com.google.common.collect.Iterators) ArrayList(java.util.ArrayList) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) WindowedSegmentId(org.apache.druid.indexing.firehose.WindowedSegmentId) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) ThreadLocalRandom(java.util.concurrent.ThreadLocalRandom) InputFileAttribute(org.apache.druid.data.input.InputFileAttribute) Nullable(javax.annotation.Nullable) RetryPolicy(org.apache.druid.indexing.common.RetryPolicy) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) Iterator(java.util.Iterator) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) File(java.io.File) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader) TreeMap(java.util.TreeMap) JsonCreator(com.fasterxml.jackson.annotation.JsonCreator) JsonInclude(com.fasterxml.jackson.annotation.JsonInclude) Preconditions(com.google.common.base.Preconditions) SegmentCacheManager(org.apache.druid.segment.loading.SegmentCacheManager) Comparator(java.util.Comparator) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) InputRowSchema(org.apache.druid.data.input.InputRowSchema) InputEntityIteratingReader(org.apache.druid.data.input.impl.InputEntityIteratingReader)

Aggregations

PartitionHolder (org.apache.druid.timeline.partition.PartitionHolder)6 Interval (org.joda.time.Interval)6 ArrayList (java.util.ArrayList)4 TimelineObjectHolder (org.apache.druid.timeline.TimelineObjectHolder)4 TreeMap (java.util.TreeMap)3 DataSegment (org.apache.druid.timeline.DataSegment)3 PartitionChunk (org.apache.druid.timeline.partition.PartitionChunk)3 Preconditions (com.google.common.base.Preconditions)2 ImmutableList (com.google.common.collect.ImmutableList)2 File (java.io.File)2 Collection (java.util.Collection)2 List (java.util.List)2 Map (java.util.Map)2 CoordinatorClient (org.apache.druid.client.coordinator.CoordinatorClient)2 TimestampSpec (org.apache.druid.data.input.impl.TimestampSpec)2 RetryPolicyFactory (org.apache.druid.indexing.common.RetryPolicyFactory)2 SegmentCacheManagerFactory (org.apache.druid.indexing.common.SegmentCacheManagerFactory)2 Logger (org.apache.druid.java.util.common.logger.Logger)2 IndexIO (org.apache.druid.segment.IndexIO)2 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)2