use of org.apache.druid.timeline.DataSegment in project druid by druid-io.
the class CompactSegmentsTest method testMakeStatsForDataSourceWithSkipped.
@Test
public void testMakeStatsForDataSourceWithSkipped() {
// Only test and validate for one datasource for simplicity.
// This dataSource has three intervals skipped (3 intervals, 1200 byte, 12 segments skipped by auto compaction)
// Note that these segment used to be 10 bytes each in other tests, we are increasing it to 100 bytes each here
// so that they will be skipped by the auto compaction.
String dataSourceName = DATA_SOURCE_PREFIX + 1;
List<DataSegment> segments = new ArrayList<>();
for (int j : new int[] { 0, 1, 2, 3, 7, 8 }) {
for (int k = 0; k < 4; k++) {
DataSegment beforeNoon = createSegment(dataSourceName, j, true, k);
DataSegment afterNoon = createSegment(dataSourceName, j, false, k);
if (j == 3) {
// Make two intervals on this day skipped (two skipped intervals back-to-back)
beforeNoon = beforeNoon.withSize(100);
afterNoon = afterNoon.withSize(100);
}
if (j == 1) {
// Make one interval on this day skipped
afterNoon = afterNoon.withSize(100);
}
segments.add(beforeNoon);
segments.add(afterNoon);
}
}
dataSources = DataSourcesSnapshot.fromUsedSegments(segments, ImmutableMap.of()).getUsedSegmentsTimelinesPerDataSource();
final TestDruidLeaderClient leaderClient = new TestDruidLeaderClient(JSON_MAPPER);
leaderClient.start();
final HttpIndexingServiceClient indexingServiceClient = new HttpIndexingServiceClient(JSON_MAPPER, leaderClient);
final CompactSegments compactSegments = new CompactSegments(COORDINATOR_CONFIG, JSON_MAPPER, indexingServiceClient);
// Before any compaction, we do not have any snapshot of compactions
Map<String, AutoCompactionSnapshot> autoCompactionSnapshots = compactSegments.getAutoCompactionSnapshot();
Assert.assertEquals(0, autoCompactionSnapshots.size());
// 3 intervals, 1200 byte (each segment is 100 bytes), 12 segments will be skipped by auto compaction
for (int compactionRunCount = 0; compactionRunCount < 8; compactionRunCount++) {
// Do a cycle of auto compaction which creates one compaction task
final CoordinatorStats stats = doCompactSegments(compactSegments);
Assert.assertEquals(1, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
verifySnapshot(compactSegments, AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING, dataSourceName, // Minus 120 bytes accounting for the three skipped segments' original size
TOTAL_BYTE_PER_DATASOURCE - 120 - 40 * (compactionRunCount + 1), 40 * (compactionRunCount + 1), 1200, TOTAL_INTERVAL_PER_DATASOURCE - 3 - (compactionRunCount + 1), (compactionRunCount + 1), 3, TOTAL_SEGMENT_PER_DATASOURCE - 12 - 4 * (compactionRunCount + 1), 4 + 2 * (compactionRunCount), 12);
}
// Test that stats does not change (and is still correct) when auto compaction runs with everything is fully compacted
final CoordinatorStats stats = doCompactSegments(compactSegments);
Assert.assertEquals(0, stats.getGlobalStat(CompactSegments.COMPACTION_TASK_COUNT));
verifySnapshot(compactSegments, AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING, dataSourceName, 0, // Minus 120 bytes accounting for the three skipped segments' original size
TOTAL_BYTE_PER_DATASOURCE - 120, 1200, 0, TOTAL_INTERVAL_PER_DATASOURCE - 3, 3, 0, 16, 12);
}
use of org.apache.druid.timeline.DataSegment in project druid by druid-io.
the class CompactSegmentsTest method addMoreData.
private void addMoreData(String dataSource, int day) {
for (int i = 0; i < 2; i++) {
DataSegment newSegment = createSegment(dataSource, day, true, i);
dataSources.get(dataSource).add(newSegment.getInterval(), newSegment.getVersion(), newSegment.getShardSpec().createChunk(newSegment));
newSegment = createSegment(dataSource, day, false, i);
dataSources.get(dataSource).add(newSegment.getInterval(), newSegment.getVersion(), newSegment.getShardSpec().createChunk(newSegment));
}
}
use of org.apache.druid.timeline.DataSegment in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method testIfSegmentsSkipOffsetWithConfiguredSegmentGranularityEqual.
@Test
public void testIfSegmentsSkipOffsetWithConfiguredSegmentGranularityEqual() {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-11-30T23:00:00/2017-12-03T00:00:00"), new Period("P1D")), new SegmentGenerateSpec(Intervals.of("2017-10-14T00:00:00/2017-10-15T00:00:00"), new Period("P1D")));
final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), new UserCompactionTaskGranularityConfig(Granularities.DAY, null, null))), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// We should only get segments in Oct
final List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-14T00:00:00/2017-12-02T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertTrue(iterator.hasNext());
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(Iterables.concat(ImmutableSet.copyOf(iterator))));
}
use of org.apache.druid.timeline.DataSegment in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentDimensions.
@Test
public void testIteratorReturnsSegmentsAsSegmentsWasCompactedAndHaveDifferentDimensions() {
// Same indexSpec as what is set in the auto compaction config
Map<String, Object> indexSpec = mapper.convertValue(new IndexSpec(), new TypeReference<Map<String, Object>>() {
});
// Same partitionsSpec as what is set in the auto compaction config
PartitionsSpec partitionsSpec = NewestSegmentFirstIterator.findPartitionsSpecFromConfig(ClientCompactionTaskQueryTuningConfig.from(null, null));
// Create segments that were compacted (CompactionState != null) and have
// Dimensions=["foo", "bar"] for interval 2017-10-01T00:00:00/2017-10-02T00:00:00,
// Dimensions=["foo"] for interval 2017-10-02T00:00:00/2017-10-03T00:00:00,
// Dimensions=null for interval 2017-10-03T00:00:00/2017-10-04T00:00:00 (dimensions was not set during last compaction)
// and dimensionsSpec=null for interval 2017-10-04T00:00:00/2017-10-05T00:00:00 (dimensionsSpec was not set during last compaction)
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("bar", "foo"))), null, null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-02T00:00:00/2017-10-03T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), null, null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, DimensionsSpec.EMPTY, null, null, indexSpec, null)), new SegmentGenerateSpec(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), new Period("P1D"), null, new CompactionState(partitionsSpec, null, null, null, indexSpec, null)));
// Auto compaction config sets Dimensions=["foo"]
CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, new UserCompactionTaskDimensionsConfig(DimensionsSpec.getDefaultSchemas(ImmutableList.of("foo"))), null, null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// We should get interval 2017-10-01T00:00:00/2017-10-02T00:00:00, interval 2017-10-04T00:00:00/2017-10-05T00:00:00, and interval 2017-10-03T00:00:00/2017-10-04T00:00:00.
Assert.assertTrue(iterator.hasNext());
List<DataSegment> expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-04T00:00:00/2017-10-05T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
Assert.assertTrue(iterator.hasNext());
expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-03T00:00:00/2017-10-04T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
Assert.assertTrue(iterator.hasNext());
expectedSegmentsToCompact = new ArrayList<>(timeline.findNonOvershadowedObjectsInInterval(Intervals.of("2017-10-01T00:00:00/2017-10-02T00:00:00"), Partitions.ONLY_COMPLETE));
Assert.assertEquals(ImmutableSet.copyOf(expectedSegmentsToCompact), ImmutableSet.copyOf(iterator.next()));
// No more
Assert.assertFalse(iterator.hasNext());
// Auto compaction config sets Dimensions=null
iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(130000, new Period("P0D"), null, new UserCompactionTaskDimensionsConfig(null), null, null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
// No more
Assert.assertFalse(iterator.hasNext());
}
use of org.apache.druid.timeline.DataSegment in project druid by druid-io.
the class NewestSegmentFirstPolicyTest method testIfFirstSegmentIsInSkipOffset.
@Test
public void testIfFirstSegmentIsInSkipOffset() {
final VersionedIntervalTimeline<String, DataSegment> timeline = createTimeline(new SegmentGenerateSpec(Intervals.of("2017-12-02T14:00:00/2017-12-03T00:00:00"), new Period("PT5H"), 40000, 1));
final CompactionSegmentIterator iterator = policy.reset(ImmutableMap.of(DATA_SOURCE, createCompactionConfig(40000, new Period("P1D"), null)), ImmutableMap.of(DATA_SOURCE, timeline), Collections.emptyMap());
Assert.assertFalse(iterator.hasNext());
}
Aggregations