Search in sources :

Example 11 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskRunTest method testCompactionWithFilterInTransformSpec.

@Test
public void testCompactionWithFilterInTransformSpec() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    // day segmentGranularity
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(1, segments.size());
    Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
    Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
    ObjectMapper mapper = new DefaultObjectMapper();
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
    Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 12 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskRunTest method testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingTrue.

@Test
public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingTrue() throws Exception {
    // This test fails with segment lock because of the bug reported in https://github.com/apache/druid/issues/10911.
    if (lockGranularity == LockGranularity.SEGMENT) {
        return;
    }
    // This creates HOUR segments with intervals of
    // - 2014-01-01T00:00:00/2014-01-01T01:00:00
    // - 2014-01-01T01:00:00/2014-01-01T02:00:00
    // - 2014-01-01T02:00:00/2014-01-01T03:00:00
    runIndexTask();
    final Interval compactionPartialInterval = Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
    // Segments that did not belong in the compaction interval are expected unchanged
    final Set<DataSegment> expectedSegments = new HashSet<>();
    expectedSegments.addAll(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T02:00:00/2014-01-01T03:00:00")), Segments.ONLY_VISIBLE));
    expectedSegments.addAll(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01T00:00:00/2014-01-01T01:00:00")), Segments.ONLY_VISIBLE));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask partialCompactionTask = builder.segmentGranularity(Granularities.MINUTE).inputSpec(new CompactionIntervalSpec(compactionPartialInterval, null), true).build();
    final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = runTask(partialCompactionTask);
    Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
    // New segments that was compacted are expected. However, old segments of the compacted interval should be drop
    // regardless of the new segments fully overshadow the old segments or not. Hence, we do not expect old segments
    // of the 2014-01-01T01:00:00/2014-01-01T02:00:00 interval post-compaction
    expectedSegments.addAll(partialCompactionResult.rhs);
    final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
    final CompactionTask fullCompactionTask = builder.segmentGranularity(null).inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), true).build();
    final Pair<TaskStatus, List<DataSegment>> fullCompactionResult = runTask(fullCompactionTask);
    Assert.assertTrue(fullCompactionResult.lhs.isSuccess());
    final List<DataSegment> segmentsAfterFullCompaction = new ArrayList<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    segmentsAfterFullCompaction.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval()));
    Assert.assertEquals(3, segmentsAfterFullCompaction.size());
    // Full Compaction with null segmentGranularity meaning that the original segmentGrnaularity is perserved
    // For the intervals, 2014-01-01T00:00:00.000Z/2014-01-01T01:00:00.000Z and 2014-01-01T02:00:00.000Z/2014-01-01T03:00:00.000Z
    // the original segmentGranularity is HOUR from the initial ingestion.
    // For the interval, 2014-01-01T01:00:00.000Z/2014-01-01T01:01:00.000Z, the original segmentGranularity is
    // MINUTE from the partial compaction done earlier.
    Assert.assertEquals(Intervals.of("2014-01-01T00:00:00.000Z/2014-01-01T01:00:00.000Z"), segmentsAfterFullCompaction.get(0).getInterval());
    Assert.assertEquals(Intervals.of("2014-01-01T01:00:00.000Z/2014-01-01T01:01:00.000Z"), segmentsAfterFullCompaction.get(1).getInterval());
    Assert.assertEquals(Intervals.of("2014-01-01T02:00:00.000Z/2014-01-01T03:00:00.000Z"), segmentsAfterFullCompaction.get(2).getInterval());
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 13 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskRunTest method testRunIndexAndCompactForSameSegmentAtTheSameTime.

@Test
public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception {
    runIndexTask();
    // make sure that indexTask becomes ready first, then compactionTask becomes ready, then indexTask runs
    final CountDownLatch compactionTaskReadyLatch = new CountDownLatch(1);
    final CountDownLatch indexTaskStartLatch = new CountDownLatch(1);
    final Future<Pair<TaskStatus, List<DataSegment>>> indexFuture = exec.submit(() -> runIndexTask(compactionTaskReadyLatch, indexTaskStartLatch, false));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01T00:00:00/2014-01-02T03:00:00")).build();
    final Future<Pair<TaskStatus, List<DataSegment>>> compactionFuture = exec.submit(() -> {
        compactionTaskReadyLatch.await();
        return runTask(compactionTask, indexTaskStartLatch, null);
    });
    Assert.assertTrue(indexFuture.get().lhs.isSuccess());
    List<DataSegment> segments = indexFuture.get().rhs;
    Assert.assertEquals(6, segments.size());
    for (int i = 0; i < 6; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i / 2, i / 2 + 1), segments.get(i).getInterval());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID + i % 2, 0, 2, (short) 1, (short) 2), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(i % 2, 2), segments.get(i).getShardSpec());
        }
    }
    final Pair<TaskStatus, List<DataSegment>> compactionResult = compactionFuture.get();
    Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode());
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) CountDownLatch(java.util.concurrent.CountDownLatch) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Pair(org.apache.druid.java.util.common.Pair) Test(org.junit.Test)

Example 14 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskRunTest method testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingFalse.

@Test
public void testPartialIntervalCompactWithFinerSegmentGranularityThenFullIntervalCompactWithDropExistingFalse() throws Exception {
    // This test fails with segment lock because of the bug reported in https://github.com/apache/druid/issues/10911.
    if (lockGranularity == LockGranularity.SEGMENT) {
        return;
    }
    runIndexTask();
    final Set<DataSegment> expectedSegments = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final Interval partialInterval = Intervals.of("2014-01-01T01:00:00/2014-01-01T02:00:00");
    final CompactionTask partialCompactionTask = builder.segmentGranularity(Granularities.MINUTE).inputSpec(new CompactionIntervalSpec(partialInterval, null), false).build();
    final Pair<TaskStatus, List<DataSegment>> partialCompactionResult = runTask(partialCompactionTask);
    Assert.assertTrue(partialCompactionResult.lhs.isSuccess());
    // All segments in the previous expectedSegments should still appear as they have larger segment granularity.
    expectedSegments.addAll(partialCompactionResult.rhs);
    final Set<DataSegment> segmentsAfterPartialCompaction = new HashSet<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    Assert.assertEquals(expectedSegments, segmentsAfterPartialCompaction);
    final CompactionTask fullCompactionTask = builder.segmentGranularity(null).inputSpec(new CompactionIntervalSpec(Intervals.of("2014-01-01/2014-01-02"), null), false).build();
    final Pair<TaskStatus, List<DataSegment>> fullCompactionResult = runTask(fullCompactionTask);
    Assert.assertTrue(fullCompactionResult.lhs.isSuccess());
    final List<DataSegment> segmentsAfterFullCompaction = new ArrayList<>(getStorageCoordinator().retrieveUsedSegmentsForIntervals(DATA_SOURCE, Collections.singletonList(Intervals.of("2014-01-01/2014-01-02")), Segments.ONLY_VISIBLE));
    segmentsAfterFullCompaction.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getInterval(), s2.getInterval()));
    Assert.assertEquals(3, segmentsAfterFullCompaction.size());
    for (int i = 0; i < segmentsAfterFullCompaction.size(); i++) {
        Assert.assertEquals(Intervals.of(StringUtils.format("2014-01-01T%02d/2014-01-01T%02d", i, i + 1)), segmentsAfterFullCompaction.get(i).getInterval());
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 15 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskTest method testSerdeWithDimensions.

@Test
public void testSerdeWithDimensions() throws IOException {
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask task = builder.segments(SEGMENTS).dimensionsSpec(new DimensionsSpec(ImmutableList.of(new StringDimensionSchema("dim1"), new StringDimensionSchema("dim2"), new StringDimensionSchema("dim3")))).tuningConfig(createTuningConfig()).context(ImmutableMap.of("testKey", "testVal")).build();
    final byte[] bytes = OBJECT_MAPPER.writeValueAsBytes(task);
    final CompactionTask fromJson = OBJECT_MAPPER.readValue(bytes, CompactionTask.class);
    assertEquals(task, fromJson);
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) StringDimensionSchema(org.apache.druid.data.input.impl.StringDimensionSchema) Test(org.junit.Test)

Aggregations

Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)40 Test (org.junit.Test)40 DataSegment (org.apache.druid.timeline.DataSegment)29 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)18 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)18 ImmutableList (com.google.common.collect.ImmutableList)17 ArrayList (java.util.ArrayList)17 List (java.util.List)17 TaskStatus (org.apache.druid.indexer.TaskStatus)15 AbstractParallelIndexSupervisorTaskTest (org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest)13 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)12 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)12 HashMap (java.util.HashMap)11 Map (java.util.Map)11 CompactionState (org.apache.druid.timeline.CompactionState)11 ImmutableMap (com.google.common.collect.ImmutableMap)8 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)5 Interval (org.joda.time.Interval)5