Search in sources :

Example 61 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentQueryGranularity.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentQueryGranularity() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // queryGranularity=DAY for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // queryGranularity=MINUTE for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // and queryGranularity=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("queryGranularity", "day"))), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("queryGranularity", "minute"))), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of())));
    // Auto compaction config sets queryGranularity=MINUTE
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(null, Granularities.MINUTE, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00 and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 62 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testClearSegmentsToCompactWhenSkippingSegments.

@Test
public void testClearSegmentsToCompactWhenSkippingSegments() {
    final long inputSegmentSizeBytes = 800000;
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-12-03T00:00:00/2017-12-04T00:00:00"), new Period("P1D"), inputSegmentSizeBytes / 2 + 10, 1), new SegmentGenerateSpec(Intervals.of("2017-12-02T00:00:00/2017-12-03T00:00:00"), new Period("P1D"), // large segment
    inputSegmentSizeBytes + 10, 1), new SegmentGenerateSpec(Intervals.of("2017-12-01T00:00:00/2017-12-02T00:00:00"), new Period("P1D"), inputSegmentSizeBytes / 3 + 10, 2));
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(inputSegmentSizeBytes, new Period("P0D"), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    final List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-12-03/2017-12-04"), Partitions.ONLY_COMPLETE));
    expectedSegmentsToCompact.sort(Comparator.naturalOrder());
    final List<DataSegment> expectedSegmentsToCompact2 = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-12-01/2017-12-02"), Partitions.ONLY_COMPLETE));
    expectedSegmentsToCompact2.sort(Comparator.naturalOrder());
    Assertions.assertThat(iterator).toIterable().containsExactly(expectedSegmentsToCompact, expectedSegmentsToCompact2);
}
Also used : ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) Test(org.junit.Test)

Example 63 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testManySegmentsPerShard.

@Test
public void testManySegmentsPerShard() {
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(800000, new Period("P1D"), null)), ImmutableMap.of(DATA_SOURCE, createTimeline(new SegmentGenerateSpec(Intervals.of("2017-12-04T01:00:00/2017-12-05T03:00:00"), new Period("PT1H"), 375, 80), new SegmentGenerateSpec(Intervals.of("2017-12-04T00:00:00/2017-12-04T01:00:00"), new Period("PT1H"), 200, 150), new SegmentGenerateSpec(Intervals.of("2017-12-03T18:00:00/2017-12-04T00:00:00"), new Period("PT6H"), 200000, 1), new SegmentGenerateSpec(Intervals.of("2017-12-03T11:00:00/2017-12-03T18:00:00"), new Period("PT1H"), 375, 80))), Collections.emptyMap());
    Interval lastInterval = null;
    while (iterator.hasNext()) {
        final List<DataSegment> segments = iterator.next();
        lastInterval = segments.get(0).getInterval();
        Interval prevInterval = null;
        for (DataSegment segment : segments) {
            if (prevInterval != null && !prevInterval.getStart().equals(segment.getInterval().getStart())) {
                Assert.assertEquals(prevInterval.getEnd(), segment.getInterval().getStart());
            }
            prevInterval = segment.getInterval();
        }
    }
    Assert.assertNotNull(lastInterval);
    Assert.assertEquals(Intervals.of("2017-12-03T11:00:00/2017-12-03T12:00:00"), lastInterval);
}
Also used : Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 64 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentRollup.

@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentRollup() {
    // Same indexSpec as what is set in the auto compaction config
    Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
    });
    // Same partitionsSpec as what is set in the auto compaction config
    PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
    // Create segments that were compacted (CompactionState != null) and have
    // rollup=false for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
    // rollup=true for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
    // and rollup=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (queryGranularity was not set during last compaction)
    final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("rollup", "false"))), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of("rollup", "true"))), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, ImmutableMap.of())));
    // Auto compaction config sets rollup=true
    final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), new UserCompactionTaskGranularityConfig(null, null, true))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
    // We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00 and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
    Assert.assertTrue(iterator.hasNext());
    List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    Assert.assertTrue(iterator.hasNext());
    expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
    Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
    // No more
    Assert.assertFalse(iterator.hasNext());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ArrayList(java.util.ArrayList) Period(org.joda.time.Period) DataSegment(org.apache.druid.timeline.DataSegment) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) Test(org.junit.Test)

Example 65 with DataSegment

use of org.apache.druid.timeline.DataSegment in project druid by druid-io.

the class CompactSegmentsTest method assertLastSegmentNotCompacted.

private void assertLastSegmentNotCompacted(CompactSegments compactSegments) {
    // Segments of the latest interval should not be compacted
    for (int i = 0; i < 3; i++) {
        final String dataSource = DATA_SOURCE_PREFIX + i;
        final Interval interval = Intervals.of(StringUtils.format("2017-01-09T12:00:00/2017-01-10"));
        List<TimelineObjectHolder<String, DataSegment>> holders = dataSources.get(dataSource).lookup(interval);
        Assert.assertEquals(1, holders.size());
        for (TimelineObjectHolder<String, DataSegment> holder : holders) {
            List<PartitionChunk<DataSegment>> chunks = Lists.newArrayList(holder.getObject());
            Assert.assertEquals(4, chunks.size());
            for (PartitionChunk<DataSegment> chunk : chunks) {
                DataSegment segment = chunk.getObject();
                Assert.assertEquals(interval, segment.getInterval());
                Assert.assertEquals("version", segment.getVersion());
            }
        }
    }
    // Emulating realtime dataSource
    final String dataSource = DATA_SOURCE_PREFIX + 0;
    addMoreData(dataSource, 9);
    CoordinatorStats stats = doCompactSegments(compactSegments);
    Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
    addMoreData(dataSource, 10);
    stats = doCompactSegments(compactSegments);
    Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
}
Also used : CoordinatorStats(org.apache.druid.server.coordinator.CoordinatorStats) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval)

Aggregations

DataSegment (org.apache.druid.timeline.DataSegment)612 Test (org.junit.Test)386 ArrayList (java.util.ArrayList)161 Interval (org.joda.time.Interval)158 File (java.io.File)138 Map (java.util.Map)110 List (java.util.List)108 ImmutableList (com.google.common.collect.ImmutableList)77 IOException (java.io.IOException)77 HashMap (java.util.HashMap)74 ImmutableMap (com.google.common.collect.ImmutableMap)72 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)68 HashSet (java.util.HashSet)58 TaskStatus (org.apache.druid.indexer.TaskStatus)53 Collectors (java.util.stream.Collectors)52 Set (java.util.Set)50 CountDownLatch (java.util.concurrent.CountDownLatch)50 ISE (org.apache.druid.java.util.common.ISE)50 SegmentId (org.apache.druid.timeline.SegmentId)47 LinearShardSpec (org.apache.druid.timeline.partition.LinearShardSpec)45