Search in sources :

Example 31 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskRunTest method testCompactionWithNewMetricInMetricsSpec.

@Test
public void testCompactionWithNewMetricInMetricsSpec() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    // day segmentGranularity
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(1, segments.size());
    Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
    Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
    ObjectMapper mapper = new DefaultObjectMapper();
    Map<String, String> expectedCountMetric = new HashMap<>();
    expectedCountMetric.put("type", "count");
    expectedCountMetric.put("name", "cnt");
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedCountMetric, expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
    Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Example 32 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskRunTest method testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity.

@Test
public void testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask1 = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(null, null, null)).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask1);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(3, segments.size());
    for (int i = 0; i < 3; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
        Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1))), segments.get(i).getLastCompactionState());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
        }
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 33 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionWithNewMetricsShouldStoreInState.

@Test
public void testRunCompactionWithNewMetricsShouldStoreInState() throws Exception {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    Assert.assertEquals(3, compactedSegments.size());
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        Map<String, String> expectedCountMetric = new HashMap<>();
        expectedCountMetric.put("type", "count");
        expectedCountMetric.put("name", "cnt");
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        CompactionState expectedState = new CompactionState(new DynamicPartitionsSpec(null, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedCountMetric, expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 34 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactionDoesNotDropSegmentsIfDropFlagNotSet.

@Test
public void testCompactionDoesNotDropSegmentsIfDropFlagNotSet() {
    runIndexTask(null, true);
    Collection<DataSegment> usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    Assert.assertEquals(3, usedSegments.size());
    for (DataSegment segment : usedSegments) {
        Assert.assertTrue(Granularities.HOUR.isAligned(segment.getInterval()));
    }
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.MINUTE, null, null)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    // All the HOUR segments did not get dropped since MINUTES segments did not fully covering the 3 HOURS interval.
    Assert.assertEquals(6, usedSegments.size());
    int hourSegmentCount = 0;
    int minuteSegmentCount = 0;
    for (DataSegment segment : usedSegments) {
        if (Granularities.MINUTE.isAligned(segment.getInterval())) {
            minuteSegmentCount++;
        }
        if (Granularities.MINUTE.isAligned(segment.getInterval())) {
            hourSegmentCount++;
        }
    }
    Assert.assertEquals(3, hourSegmentCount);
    Assert.assertEquals(3, minuteSegmentCount);
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 35 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactionDropSegmentsOfInputIntervalIfDropFlagIsSet.

@Test
public void testCompactionDropSegmentsOfInputIntervalIfDropFlagIsSet() {
    runIndexTask(null, true);
    Collection<DataSegment> usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    Assert.assertEquals(3, usedSegments.size());
    for (DataSegment segment : usedSegments) {
        Assert.assertTrue(Granularities.HOUR.isAligned(segment.getInterval()));
    }
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null), true).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.MINUTE, null, null)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    // All the HOUR segments got dropped even if we do not have all MINUTES segments fully covering the 3 HOURS interval.
    // In fact, we only have 3 minutes of data out of the 3 hours interval.
    Assert.assertEquals(3, usedSegments.size());
    for (DataSegment segment : usedSegments) {
        Assert.assertTrue(Granularities.MINUTE.isAligned(segment.getInterval()));
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Aggregations

Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)40 Test (org.junit.Test)40 DataSegment (org.apache.druid.timeline.DataSegment)29 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)18 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)18 ImmutableList (com.google.common.collect.ImmutableList)17 ArrayList (java.util.ArrayList)17 List (java.util.List)17 TaskStatus (org.apache.druid.indexer.TaskStatus)15 AbstractParallelIndexSupervisorTaskTest (org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest)13 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)12 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)12 HashMap (java.util.HashMap)11 Map (java.util.Map)11 CompactionState (org.apache.druid.timeline.CompactionState)11 ImmutableMap (com.google.common.collect.ImmutableMap)8 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)5 Interval (org.joda.time.Interval)5