Search in sources :

Example 56 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsCompactionStateChangedWithCompactedStateHasSameSegmentGranularity.

@Test
public void testIteratorReturnsSegmentsAsCompactionStateChangedWithCompactedStateHasSameSegmentGranularity() {
    // Different indexSpec as what is set in the auto compaction config
    IndexSpec newIndexSpec = new IndexSpec(new ConciseBitmapSerdeFactory(), null, null, null);
    Map<String, Object> newIndexSpecMap = mapper.convertValue(newIndexSpec, new TypeReference<Map<String, Object>>() {
    });
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, newIndexSpecMap, null)));
    // Duration of new segmentGranularity is the same as before (P1D)
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(new PeriodGranularity(new Period("P1D"), null, DateTimeZone.UTC), null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get all segments in timeline back since indexSpec changed
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-03T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 57 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsInMultipleIntervalIfConfiguredSegmentGranularityCrossBoundary.

@Test
public void testIteratorReturnsSegmentsInMultipleIntervalIfConfiguredSegmentGranularityCrossBoundary() {
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2020-01-01/2020-01-08"), new Period("P7D")), new SegmentGenerateSpec(Intervals.of("2020-01-28/2020-02-03"), new Period("P7D")), new SegmentGenerateSpec(Intervals.of("2020-02-08/2020-02-15"), new Period("P7D")));
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(Granularities.MONTH, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get the segment of "2020-01-28/2020-02-03" back twice when the iterator returns for Jan and when the
    // iterator returns for Feb.
    // Month of Feb
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2020-01-28/2020-02-15"), Partitions.ONLY_COMPLETE));
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> actual = iterator.next();
    Assert.assertEquals(expectedSegmentsToCompact.size(), actual.size());
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(actual));
    // Month of Jan
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2020-01-01/2020-02-03"), Partitions.ONLY_COMPLETE));
    Assert.assertTrue(iterator.hasNext());
    actual = iterator.next();
    Assert.assertEquals(expectedSegmentsToCompact.size(), actual.size());
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(actual));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : ArrayList(java.util.ArrayList) Period(org.joda.time.Period) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) DataSegment(org.apache.druid.timeline.DataSegment) Test(org.junit.Test)

Example 58 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentFilter.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentFilter() throws Exception {
    NullHandling.initializeForTests();
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // filter=SelectorDimFilter("dim1", "foo", null) for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // filter=SelectorDimFilter("dim1", "bar", null) for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // filter=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (filter was not set during last compaction)
    // and transformSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (transformSpec was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, mapper.readValue(mapper.writeValueAsString(new TransformSpec(new SelectorDimFilter("dim1", "foo", null), null)), new TypeReference<Map<String, Object>>() {
    }), indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, mapper.readValue(mapper.writeValueAsString(new TransformSpec(new SelectorDimFilter("dim1", "bar", null), null)), new TypeReference<Map<String, Object>>() {
    }), indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, mapper.readValue(mapper.writeValueAsString(new TransformSpec(null, null)), new TypeReference<Map<String, Object>>() {
    }), indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
    // Auto compaction config sets filter=SelectorDimFilter("dim1", "bar", null)
    CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, new UserCompactionTaskTransformConfig(new SelectorDimFilter("dim1", "bar", null)), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00, interval 2017-10-04T00:00:00/2017-10-05T00:00:00, and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
    // Auto compaction config sets filter=null
    iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, null, new UserCompactionTaskTransformConfig(null), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) UserCompactionTaskTransformConfig(org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) CompactionState(org.apache.druid.timeline.CompactionState) TypeReference(com.fasterxml.jackson.core.type.TypeReference) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 59 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentOrigin.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentOrigin() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have segmentGranularity=DAY
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
    // Duration of new segmentGranularity is the same as before (P1D), but we changed the origin in the autocompaction spec
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(new PeriodGranularity(new Period("P1D"), DateTimes.of("2012-01-02T00:05:00.000Z"), DateTimeZone.UTC), null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get all segments in timeline back since skip offset is P0D.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-03T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 60 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method createTimeline.

private static VersionedIntervalTimeline<String, DataSegment> createTimeline(SegmentGenerateSpec... specs) {
    List<DataSegment> segments = new ArrayList<>();
    final String version = DateTimes.nowUtc().toString();
    final List<SegmentGenerateSpec> orderedSpecs = Arrays.asList(specs);
    orderedSpecs.sort(Comparator.comparing(s -> s.totalInterval, Comparators.intervalsByStartThenEnd().reversed()));
    for (SegmentGenerateSpec spec : orderedSpecs) {
        Interval remainingInterval = spec.totalInterval;
        while (!Intervals.isEmpty(remainingInterval)) {
            final Interval segmentInterval;
            if (remainingInterval.toDuration().isLongerThan(spec.segmentPeriod.toStandardDuration())) {
                segmentInterval = new Interval(spec.segmentPeriod, remainingInterval.getEnd());
            } else {
                segmentInterval = remainingInterval;
            }
            for (int i = 0; i < spec.numSegmentsPerShard; i++) {
                final ShardSpec shardSpec = new NumberedShardSpec(i, spec.numSegmentsPerShard);
                final DataSegment segment = new DataSegment(DATA_SOURCE, segmentInterval, spec.version == null ? version : spec.version, null, ImmutableList.of(), ImmutableList.of(), shardSpec, spec.lastCompactionState, 0, spec.segmentSize);
                segments.add(segment);
            }
            remainingInterval = SegmentCompactionUtil.removeIntervalFromEnd(remainingInterval, segmentInterval);
        }
    }
    return VersionedIntervalTimeline.forSegments(segments);
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) Partitions(org.apache.druid.timeline.Partitions) UserCompactionTaskDimensionsConfig(org.apache.druid.server.coordinator.UserCompactionTaskDimensionsConfig) IndexSpec(org.apache.druid.segment.IndexSpec) CompactionState(org.apache.druid.timeline.CompactionState) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) Assertions(org.assertj.core.api.Assertions) TypeReference(com.fasterxml.jackson.core.type.TypeReference) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) DateTimes(org.apache.druid.java.util.common.DateTimes) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) Collectors(java.util.stream.Collectors) UserCompactionTaskTransformConfig(org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Iterables(com.google.common.collect.Iterables) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Intervals(org.apache.druid.java.util.common.Intervals) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Period(org.joda.time.Period) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Granularities(org.apache.druid.java.util.common.granularity.Granularities) NullHandling(org.apache.druid.common.config.NullHandling) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval)

Aggregations

DataSegment (org.apache.druid.timeline.DataSegment)612 Test (org.junit.Test)386 ArrayList (java.util.ArrayList)161 Interval (org.joda.time.Interval)158 File (java.io.File)138 Map (java.util.Map)110 List (java.util.List)108 ImmutableList (com.google.common.collect.ImmutableList)77 IOException (java.io.IOException)77 HashMap (java.util.HashMap)74 ImmutableMap (com.google.common.collect.ImmutableMap)72 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)68 HashSet (java.util.HashSet)58 TaskStatus (org.apache.druid.indexer.TaskStatus)53 Collectors (java.util.stream.Collectors)52 Set (java.util.Set)50 CountDownLatch (java.util.concurrent.CountDownLatch)50 ISE (org.apache.druid.java.util.common.ISE)50 SegmentId (org.apache.druid.timeline.SegmentId)47 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)45