use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.
the class CompactionTaskRunTest method testCompactionWithFilterInTransformSpec.
@Test
public void testCompactionWithFilterInTransformSpec() throws Exception {
runIndexTask();
final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
// day segmentGranularity
final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
Assert.assertTrue(resultPair.lhs.isSuccess());
List<DataSegment> segments = resultPair.rhs;
Assert.assertEquals(1, segments.size());
Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
ObjectMapper mapper = new DefaultObjectMapper();
Map<String, String> expectedLongSumMetric = new HashMap<>();
expectedLongSumMetric.put("type", "longSum");
expectedLongSumMetric.put("name", "val");
expectedLongSumMetric.put("fieldName", "val");
expectedLongSumMetric.put("expression", null);
CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.
the class CompactionTaskRunTest method testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingTrue.
@Test
public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingTrue() throws Exception {
// This test fails with segment lock because of the bug reported in https://github.com/apache/druid/issues/10911.
if (lockGranularity == LockGranularity.SEGMENT) {
return;
}
// This creates HOUR segments with intervals of
// - 2014-01-01T00:00:00/2014-01-01T01:00:00
// - 2014-01-01T01:00:00/2014-01-01T02:00:00
// - 2014-01-01T02:00:00/2014-01-01T03:00:00
runIndexTask();
final Interval compactionPartialInterval = Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
// Segments that did not belong in the compaction interval are expected unchanged
final Set<DataSegment> expectedSegments = new HashSet<>();
expectedSegments.addAll(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), Segments.ONLY_VISIBLE));
expectedSegments.addAll(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), Segments.ONLY_VISIBLE));
final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
final CompactionTask partialCompactionTask = builder.segmentGranularity(Granularities.MINUTE).inputSpec(new CompactionIntervalSpec(compactionPartialInterval, null), true).build();
final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = runTask(partialCompactionTask);
Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
// New segments that was compacted are expected. However, old segments of the compacted interval should be drop
// regardless of the new segments fully overshadow the old segments or not. Hence, we do not expect old segments
// of the 2014-01-01T01:00:00/2014-01-01T02:00:00 interval post-compaction
expectedSegments.addAll(partialCompactionResult.rhs);
final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
final CompactionTask fullCompactionTask = builder.segmentGranularity(null).inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), true).build();
final Pair<TaskStatus, List<DataSegment>> fullCompactionResult = runTask(fullCompactionTask);
Assert.assertTrue(fullCompactionResult.lhs.isSuccess());
final List<DataSegment> segmentsAfterFullCompaction = new ArrayList<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
segmentsAfterFullCompaction.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval()));
Assert.assertEquals(3, segmentsAfterFullCompaction.size());
// Full Compaction with null segmentGranularity meaning that the original segmentGrnaularity is perserved
// For the intervals, 2014-01-01T00:00:00.000Z/2014-01-01T01:00:00.000Z and 2014-01-01T02:00:00.000Z/2014-01-01T03:00:00.000Z
// the original segmentGranularity is HOUR from the initial ingestion.
// For the interval, 2014-01-01T01:00:00.000Z/2014-01-01T01:01:00.000Z, the original segmentGranularity is
// MINUTE from the partial compaction done earlier.
Assert.assertEquals(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-01T01:00:00.000Z"), segmentsAfterFullCompaction.get(0).getInterval());
Assert.assertEquals(Intervals.of("2014-01-01T01:00:00.000Z/2014-01-01T01:01:00.000Z"), segmentsAfterFullCompaction.get(1).getInterval());
Assert.assertEquals(Intervals.of("2014-01-01T02:00:00.000Z/2014-01-01T03:00:00.000Z"), segmentsAfterFullCompaction.get(2).getInterval());
}
use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.
the class CompactionTaskRunTest method testRunIndexAndCompactForSameSegmentAtTheSameTime.
@Test
public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception {
runIndexTask();
// make sure that indexTask becomes ready first, then compactionTask becomes ready, then indexTask runs
final CountDownLatch compactionTaskReadyLatch = new CountDownLatch(1);
final CountDownLatch indexTaskStartLatch = new CountDownLatch(1);
final Future<Pair<TaskStatus, List<DataSegment>>> indexFuture = exec.submit(() -> runIndexTask(compactionTaskReadyLatch, indexTaskStartLatch, false));
final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01T00:00:00/2014-01-02T03:00:00")).build();
final Future<Pair<TaskStatus, List<DataSegment>>> compactionFuture = exec.submit(() -> {
compactionTaskReadyLatch.await();
return runTask(compactionTask, indexTaskStartLatch, null);
});
Assert.assertTrue(indexFuture.get().lhs.isSuccess());
List<DataSegment> segments = indexFuture.get().rhs;
Assert.assertEquals(6, segments.size());
for (int i = 0; i < 6; i++) {
Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i / 2, i / 2 + 1), segments.get(i).getInterval());
if (lockGranularity == LockGranularity.SEGMENT) {
Assert.assertEquals(new NumberedOverwriteShardSpec(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID + i % 2, 0, 2, (short) 1, (short) 2), segments.get(i).getShardSpec());
} else {
Assert.assertEquals(new NumberedShardSpec(i % 2, 2), segments.get(i).getShardSpec());
}
}
final Pair<TaskStatus, List<DataSegment>> compactionResult = compactionFuture.get();
Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode());
}
use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.
the class CompactionTaskRunTest method testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingFalse.
@Test
public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingFalse() throws Exception {
// This test fails with segment lock because of the bug reported in https://github.com/apache/druid/issues/10911.
if (lockGranularity == LockGranularity.SEGMENT) {
return;
}
runIndexTask();
final Set<DataSegment> expectedSegments = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
final Interval partialInterval = Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
final CompactionTask partialCompactionTask = builder.segmentGranularity(Granularities.MINUTE).inputSpec(new CompactionIntervalSpec(partialInterval, null), false).build();
final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = runTask(partialCompactionTask);
Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
// All segments in the previous expectedSegments should still appear as they have larger segment granularity.
expectedSegments.addAll(partialCompactionResult.rhs);
final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
final CompactionTask fullCompactionTask = builder.segmentGranularity(null).inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), false).build();
final Pair<TaskStatus, List<DataSegment>> fullCompactionResult = runTask(fullCompactionTask);
Assert.assertTrue(fullCompactionResult.lhs.isSuccess());
final List<DataSegment> segmentsAfterFullCompaction = new ArrayList<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
segmentsAfterFullCompaction.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval()));
Assert.assertEquals(3, segmentsAfterFullCompaction.size());
for (int i = 0; i < segmentsAfterFullCompaction.size(); i++) {
Assert.assertEquals(Intervals.of(StringUtils.format("2014-01-01T%02d/2014-01-01T%02d", i, i + 1)), segmentsAfterFullCompaction.get(i).getInterval());
}
}
use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.
the class CompactionTaskTest method testSerdeWithDimensions.
@Test
public void testSerdeWithDimensions() throws IOException {
final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
final CompactionTask task = builder.segments(SEGMENTS).dimensionsSpec(new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim3")))).tuningConfig(createTuningConfig()).context(ImmutableMap.of("testKey", "testVal")).build();
final byte[] bytes = OBJECT_MAPPER.writeValueAsBytes(task);
final CompactionTask fromJson = OBJECT_MAPPER.readValue(bytes, CompactionTask.class);
assertEquals(task, fromJson);
}
Aggregations