Search in sources :

Example 16 with CompactionState

use of org.apache.druid.timeline.CompactionState in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionWithNewMetricsShouldStoreInState.

@Test
public void testRunCompactionWithNewMetricsShouldStoreInState() throws Exception {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).metricsSpec(new AggregatorFactory[] { new CountAggregatorFactory("cnt"), new LongSumAggregatorFactory("val", "val") }).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    Assert.assertEquals(3, compactedSegments.size());
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        Map<String, String> expectedCountMetric = new HashMap<>();
        expectedCountMetric.put("type", "count");
        expectedCountMetric.put("name", "cnt");
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        CompactionState expectedState = new CompactionState(new DynamicPartitionsSpec(null, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedCountMetric, expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 17 with CompactionState

use of org.apache.druid.timeline.CompactionState in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionWithFilterShouldStoreInState.

@Test
public void testRunCompactionWithFilterShouldStoreInState() throws Exception {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    Assert.assertEquals(3, compactedSegments.size());
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        CompactionState expectedState = new CompactionState(new DynamicPartitionsSpec(null, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 18 with CompactionState

use of org.apache.druid.timeline.CompactionState in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunParallelWithMultiDimensionRangePartitioning.

@Test
public void testRunParallelWithMultiDimensionRangePartitioning() throws Exception {
    // Range partitioning is not supported with segment lock yet
    Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), 2, true)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        // Expect compaction state to exist as store compaction state by default
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        Assert.assertSame(DimensionRangeShardSpec.class, segment.getShardSpec().getClass());
        CompactionState expectedState = new CompactionState(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 19 with CompactionState

use of org.apache.druid.timeline.CompactionState in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunParallelWithRangePartitioning.

@Test
public void testRunParallelWithRangePartitioning() throws Exception {
    // Range partitioning is not supported with segment lock yet
    Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new SingleDimensionPartitionsSpec(7, null, "dim", false), 2, true)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        // Expect compaction state to exist as store compaction state by default
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        Assert.assertSame(SingleDimensionShardSpec.class, segment.getShardSpec().getClass());
        CompactionState expectedState = new CompactionState(new SingleDimensionPartitionsSpec(7, null, "dim", false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 20 with CompactionState

use of org.apache.druid.timeline.CompactionState in project druid by druid-io.

the class NewestSegmentFirstIterator method needsCompaction.

private boolean needsCompaction(DataSourceCompactionConfig config, SegmentsToCompact candidates) {
    Preconditions.checkState(!candidates.isEmpty(), "Empty candidates");
    final ClientCompactionTaskQueryTuningConfig tuningConfig = ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment());
    final PartitionsSpec partitionsSpecFromConfig = findPartitionsSpecFromConfig(tuningConfig);
    final CompactionState lastCompactionState = candidates.segments.get(0).getLastCompactionState();
    if (lastCompactionState == null) {
        log.info("Candidate segment[%s] is not compacted yet. Needs compaction.", candidates.segments.get(0).getId());
        return true;
    }
    final boolean allCandidatesHaveSameLastCompactionState = candidates.segments.stream().allMatch(segment -> lastCompactionState.equals(segment.getLastCompactionState()));
    if (!allCandidatesHaveSameLastCompactionState) {
        log.info("[%s] Candidate segments were compacted with different partitions spec. Needs compaction.", candidates.segments.size());
        log.debugSegments(candidates.segments, "Candidate segments compacted with different partiton spec");
        return true;
    }
    final PartitionsSpec segmentPartitionsSpec = lastCompactionState.getPartitionsSpec();
    final IndexSpec segmentIndexSpec = objectMapper.convertValue(lastCompactionState.getIndexSpec(), IndexSpec.class);
    final IndexSpec configuredIndexSpec;
    if (tuningConfig.getIndexSpec() == null) {
        configuredIndexSpec = new IndexSpec();
    } else {
        configuredIndexSpec = tuningConfig.getIndexSpec();
    }
    if (!Objects.equals(partitionsSpecFromConfig, segmentPartitionsSpec)) {
        log.info("Configured partitionsSpec[%s] is differenet from " + "the partitionsSpec[%s] of segments. Needs compaction.", partitionsSpecFromConfig, segmentPartitionsSpec);
        return true;
    }
    // segmentIndexSpec cannot be null.
    if (!segmentIndexSpec.equals(configuredIndexSpec)) {
        log.info("Configured indexSpec[%s] is different from the one[%s] of segments. Needs compaction", configuredIndexSpec, segmentIndexSpec);
        return true;
    }
    if (config.getGranularitySpec() != null) {
        final ClientCompactionTaskGranularitySpec existingGranularitySpec = lastCompactionState.getGranularitySpec() != null ? objectMapper.convertValue(lastCompactionState.getGranularitySpec(), ClientCompactionTaskGranularitySpec.class) : null;
        // Checks for segmentGranularity
        if (config.getGranularitySpec().getSegmentGranularity() != null) {
            final Granularity existingSegmentGranularity = existingGranularitySpec != null ? existingGranularitySpec.getSegmentGranularity() : null;
            if (existingSegmentGranularity == null) {
                // Candidate segments were all compacted without segment granularity set.
                // We need to check if all segments have the same segment granularity as the configured segment granularity.
                boolean needsCompaction = candidates.segments.stream().anyMatch(segment -> !config.getGranularitySpec().getSegmentGranularity().isAligned(segment.getInterval()));
                if (needsCompaction) {
                    log.info("Segments were previously compacted but without segmentGranularity in auto compaction." + " Configured segmentGranularity[%s] is different from granularity implied by segment intervals. Needs compaction", config.getGranularitySpec().getSegmentGranularity());
                    return true;
                }
            } else if (!config.getGranularitySpec().getSegmentGranularity().equals(existingSegmentGranularity)) {
                log.info("Configured segmentGranularity[%s] is different from the segmentGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getSegmentGranularity(), existingSegmentGranularity);
                return true;
            }
        }
        // Checks for rollup
        if (config.getGranularitySpec().isRollup() != null) {
            final Boolean existingRollup = existingGranularitySpec != null ? existingGranularitySpec.isRollup() : null;
            if (existingRollup == null || !config.getGranularitySpec().isRollup().equals(existingRollup)) {
                log.info("Configured rollup[%s] is different from the rollup[%s] of segments. Needs compaction", config.getGranularitySpec().isRollup(), existingRollup);
                return true;
            }
        }
        // Checks for queryGranularity
        if (config.getGranularitySpec().getQueryGranularity() != null) {
            final Granularity existingQueryGranularity = existingGranularitySpec != null ? existingGranularitySpec.getQueryGranularity() : null;
            if (!config.getGranularitySpec().getQueryGranularity().equals(existingQueryGranularity)) {
                log.info("Configured queryGranularity[%s] is different from the queryGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getQueryGranularity(), existingQueryGranularity);
                return true;
            }
        }
    }
    if (config.getDimensionsSpec() != null) {
        final DimensionsSpec existingDimensionsSpec = lastCompactionState.getDimensionsSpec();
        // Checks for list of dimensions
        if (config.getDimensionsSpec().getDimensions() != null) {
            final List<DimensionSchema> existingDimensions = existingDimensionsSpec != null ? existingDimensionsSpec.getDimensions() : null;
            if (!config.getDimensionsSpec().getDimensions().equals(existingDimensions)) {
                log.info("Configured dimensionsSpec is different from the dimensionsSpec of segments. Needs compaction");
                return true;
            }
        }
    }
    if (config.getTransformSpec() != null) {
        final ClientCompactionTaskTransformSpec existingTransformSpec = lastCompactionState.getTransformSpec() != null ? objectMapper.convertValue(lastCompactionState.getTransformSpec(), ClientCompactionTaskTransformSpec.class) : null;
        // Checks for filters
        if (config.getTransformSpec().getFilter() != null) {
            final DimFilter existingFilters = existingTransformSpec != null ? existingTransformSpec.getFilter() : null;
            if (!config.getTransformSpec().getFilter().equals(existingFilters)) {
                log.info("Configured filter[%s] is different from the filter[%s] of segments. Needs compaction", config.getTransformSpec().getFilter(), existingFilters);
                return true;
            }
        }
    }
    if (ArrayUtils.isNotEmpty(config.getMetricsSpec())) {
        final AggregatorFactory[] existingMetricsSpec = lastCompactionState.getMetricsSpec() == null || lastCompactionState.getMetricsSpec().isEmpty() ? null : objectMapper.convertValue(lastCompactionState.getMetricsSpec(), AggregatorFactory[].class);
        if (existingMetricsSpec == null || !Arrays.deepEquals(config.getMetricsSpec(), existingMetricsSpec)) {
            log.info("Configured metricsSpec[%s] is different from the metricsSpec[%s] of segments. Needs compaction", Arrays.toString(config.getMetricsSpec()), Arrays.toString(existingMetricsSpec));
            return true;
        }
    }
    return false;
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) DimFilter(org.apache.druid.query.filter.DimFilter)

Aggregations

CompactionState (org.apache.druid.timeline.CompactionState)27 Map (java.util.Map)25 DataSegment (org.apache.druid.timeline.DataSegment)25 Test (org.junit.Test)24 ImmutableMap (com.google.common.collect.ImmutableMap)20 IndexSpec (org.apache.druid.segment.IndexSpec)16 ArrayList (java.util.ArrayList)15 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)15 PartitionsSpec (org.apache.druid.indexer.partitions.PartitionsSpec)13 Period (org.joda.time.Period)13 HashMap (java.util.HashMap)12 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)12 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)11 UserCompactionTaskGranularityConfig (org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig)9 AbstractParallelIndexSupervisorTaskTest (org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest)8 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)7 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)6 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)5 ImmutableList (com.google.common.collect.ImmutableList)4 List (java.util.List)4