Search in sources :

Example 21 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskTest method testCreateCompactionTaskWithNullSegmentGranularityInGranularitySpecAndSegmentGranularityShouldSucceed.

@Test(expected = IAE.class)
public void testCreateCompactionTaskWithNullSegmentGranularityInGranularitySpecAndSegmentGranularityShouldSucceed() {
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    builder.inputSpec(new CompactionIntervalSpec(COMPACTION_INTERVAL, SegmentUtils.hashIds(SEGMENTS)));
    builder.tuningConfig(createTuningConfig());
    builder.segmentGranularity(Granularities.HOUR);
    builder.granularitySpec(new ClientCompactionTaskGranularitySpec(null, Granularities.DAY, null));
    try {
        builder.build();
    } catch (IAE iae) {
        Assert.assertEquals(StringUtils.format(CONFLICTING_SEGMENT_GRANULARITY_FORMAT, Granularities.HOUR, null), iae.getMessage());
        throw iae;
    }
    Assert.fail("Should not have reached here!");
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) IAE(org.apache.druid.java.util.common.IAE) Test(org.junit.Test)

Example 22 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskTest method testQueryGranularityAndNullSegmentGranularity.

@Test
public void testQueryGranularityAndNullSegmentGranularity() throws IOException, SegmentLoadingException {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, null, new ClientCompactionTaskGranularitySpec(null, new PeriodGranularity(Period.months(3), null, null), null), COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(6, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), SEGMENT_INTERVALS, Granularities.MONTH, new PeriodGranularity(Period.months(3), null, null), IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) Test(org.junit.Test)

Example 23 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskTest method testGranularitySpecWithNullQueryGranularityAndNullSegmentGranularity.

@Test
public void testGranularitySpecWithNullQueryGranularityAndNullSegmentGranularity() throws IOException, SegmentLoadingException {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, null, new ClientCompactionTaskGranularitySpec(null, null, null), COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = getExpectedDimensionsSpecForAutoGeneration();
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(6, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), SEGMENT_INTERVALS, Granularities.MONTH, Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) Test(org.junit.Test)

Example 24 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class NewestSegmentFirstIterator method needsCompaction.

private boolean needsCompaction(DataSourceCompactionConfig config, SegmentsToCompact candidates) {
    Preconditions.checkState(!candidates.isEmpty(), "Empty candidates");
    final ClientCompactionTaskQueryTuningConfig tuningConfig = ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment());
    final PartitionsSpec partitionsSpecFromConfig = findPartitionsSpecFromConfig(tuningConfig);
    final CompactionState lastCompactionState = candidates.segments.get(0).getLastCompactionState();
    if (lastCompactionState == null) {
        log.info("Candidate segment[%s] is not compacted yet. Needs compaction.", candidates.segments.get(0).getId());
        return true;
    }
    final boolean allCandidatesHaveSameLastCompactionState = candidates.segments.stream().allMatch(segment -> lastCompactionState.equals(segment.getLastCompactionState()));
    if (!allCandidatesHaveSameLastCompactionState) {
        log.info("[%s] Candidate segments were compacted with different partitions spec. Needs compaction.", candidates.segments.size());
        log.debugSegments(candidates.segments, "Candidate segments compacted with different partiton spec");
        return true;
    }
    final PartitionsSpec segmentPartitionsSpec = lastCompactionState.getPartitionsSpec();
    final IndexSpec segmentIndexSpec = objectMapper.convertValue(lastCompactionState.getIndexSpec(), IndexSpec.class);
    final IndexSpec configuredIndexSpec;
    if (tuningConfig.getIndexSpec() == null) {
        configuredIndexSpec = new IndexSpec();
    } else {
        configuredIndexSpec = tuningConfig.getIndexSpec();
    }
    if (!Objects.equals(partitionsSpecFromConfig, segmentPartitionsSpec)) {
        log.info("Configured partitionsSpec[%s] is differenet from " + "the partitionsSpec[%s] of segments. Needs compaction.", partitionsSpecFromConfig, segmentPartitionsSpec);
        return true;
    }
    // segmentIndexSpec cannot be null.
    if (!segmentIndexSpec.equals(configuredIndexSpec)) {
        log.info("Configured indexSpec[%s] is different from the one[%s] of segments. Needs compaction", configuredIndexSpec, segmentIndexSpec);
        return true;
    }
    if (config.getGranularitySpec() != null) {
        final ClientCompactionTaskGranularitySpec existingGranularitySpec = lastCompactionState.getGranularitySpec() != null ? objectMapper.convertValue(lastCompactionState.getGranularitySpec(), ClientCompactionTaskGranularitySpec.class) : null;
        // Checks for segmentGranularity
        if (config.getGranularitySpec().getSegmentGranularity() != null) {
            final Granularity existingSegmentGranularity = existingGranularitySpec != null ? existingGranularitySpec.getSegmentGranularity() : null;
            if (existingSegmentGranularity == null) {
                // Candidate segments were all compacted without segment granularity set.
                // We need to check if all segments have the same segment granularity as the configured segment granularity.
                boolean needsCompaction = candidates.segments.stream().anyMatch(segment -> !config.getGranularitySpec().getSegmentGranularity().isAligned(segment.getInterval()));
                if (needsCompaction) {
                    log.info("Segments were previously compacted but without segmentGranularity in auto compaction." + " Configured segmentGranularity[%s] is different from granularity implied by segment intervals. Needs compaction", config.getGranularitySpec().getSegmentGranularity());
                    return true;
                }
            } else if (!config.getGranularitySpec().getSegmentGranularity().equals(existingSegmentGranularity)) {
                log.info("Configured segmentGranularity[%s] is different from the segmentGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getSegmentGranularity(), existingSegmentGranularity);
                return true;
            }
        }
        // Checks for rollup
        if (config.getGranularitySpec().isRollup() != null) {
            final Boolean existingRollup = existingGranularitySpec != null ? existingGranularitySpec.isRollup() : null;
            if (existingRollup == null || !config.getGranularitySpec().isRollup().equals(existingRollup)) {
                log.info("Configured rollup[%s] is different from the rollup[%s] of segments. Needs compaction", config.getGranularitySpec().isRollup(), existingRollup);
                return true;
            }
        }
        // Checks for queryGranularity
        if (config.getGranularitySpec().getQueryGranularity() != null) {
            final Granularity existingQueryGranularity = existingGranularitySpec != null ? existingGranularitySpec.getQueryGranularity() : null;
            if (!config.getGranularitySpec().getQueryGranularity().equals(existingQueryGranularity)) {
                log.info("Configured queryGranularity[%s] is different from the queryGranularity[%s] of segments. Needs compaction", config.getGranularitySpec().getQueryGranularity(), existingQueryGranularity);
                return true;
            }
        }
    }
    if (config.getDimensionsSpec() != null) {
        final DimensionsSpec existingDimensionsSpec = lastCompactionState.getDimensionsSpec();
        // Checks for list of dimensions
        if (config.getDimensionsSpec().getDimensions() != null) {
            final List<DimensionSchema> existingDimensions = existingDimensionsSpec != null ? existingDimensionsSpec.getDimensions() : null;
            if (!config.getDimensionsSpec().getDimensions().equals(existingDimensions)) {
                log.info("Configured dimensionsSpec is different from the dimensionsSpec of segments. Needs compaction");
                return true;
            }
        }
    }
    if (config.getTransformSpec() != null) {
        final ClientCompactionTaskTransformSpec existingTransformSpec = lastCompactionState.getTransformSpec() != null ? objectMapper.convertValue(lastCompactionState.getTransformSpec(), ClientCompactionTaskTransformSpec.class) : null;
        // Checks for filters
        if (config.getTransformSpec().getFilter() != null) {
            final DimFilter existingFilters = existingTransformSpec != null ? existingTransformSpec.getFilter() : null;
            if (!config.getTransformSpec().getFilter().equals(existingFilters)) {
                log.info("Configured filter[%s] is different from the filter[%s] of segments. Needs compaction", config.getTransformSpec().getFilter(), existingFilters);
                return true;
            }
        }
    }
    if (ArrayUtils.isNotEmpty(config.getMetricsSpec())) {
        final AggregatorFactory[] existingMetricsSpec = lastCompactionState.getMetricsSpec() == null || lastCompactionState.getMetricsSpec().isEmpty() ? null : objectMapper.convertValue(lastCompactionState.getMetricsSpec(), AggregatorFactory[].class);
        if (existingMetricsSpec == null || !Arrays.deepEquals(config.getMetricsSpec(), existingMetricsSpec)) {
            log.info("Configured metricsSpec[%s] is different from the metricsSpec[%s] of segments. Needs compaction", Arrays.toString(config.getMetricsSpec()), Arrays.toString(existingMetricsSpec));
            return true;
        }
    }
    return false;
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DimensionSchema(org.apache.druid.data.input.impl.DimensionSchema) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) DimFilter(org.apache.druid.query.filter.DimFilter)

Example 25 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactSegments method doRun.

private CoordinatorStats doRun(Map<String, DataSourceCompactionConfig> compactionConfigs, Map<String, AutoCompactionSnapshot.Builder> currentRunAutoCompactionSnapshotBuilders, int numAvailableCompactionTaskSlots, CompactionSegmentIterator iterator) {
    int numSubmittedTasks = 0;
    int numCompactionTasksAndSubtasks = 0;
    while (iterator.hasNext() && numCompactionTasksAndSubtasks < numAvailableCompactionTaskSlots) {
        final List<DataSegment> segmentsToCompact = iterator.next();
        if (!segmentsToCompact.isEmpty()) {
            final String dataSourceName = segmentsToCompact.get(0).getDataSource();
            // As these segments will be compacted, we will aggregates the statistic to the Compacted statistics
            AutoCompactionSnapshot.Builder snapshotBuilder = currentRunAutoCompactionSnapshotBuilders.computeIfAbsent(dataSourceName, k -> new AutoCompactionSnapshot.Builder(k, AutoCompactionSnapshot.AutoCompactionScheduleStatus.RUNNING));
            snapshotBuilder.incrementBytesCompacted(segmentsToCompact.stream().mapToLong(DataSegment::getSize).sum());
            snapshotBuilder.incrementIntervalCountCompacted(segmentsToCompact.stream().map(DataSegment::getInterval).distinct().count());
            snapshotBuilder.incrementSegmentCountCompacted(segmentsToCompact.size());
            final DataSourceCompactionConfig config = compactionConfigs.get(dataSourceName);
            // Create granularitySpec to send to compaction task
            ClientCompactionTaskGranularitySpec granularitySpec;
            Granularity segmentGranularityToUse = null;
            if (config.getGranularitySpec() == null || config.getGranularitySpec().getSegmentGranularity() == null) {
                // Determines segmentGranularity from the segmentsToCompact
                // Each batch of segmentToCompact from CompactionSegmentIterator will contains the same interval as
                // segmentGranularity is not set in the compaction config
                Interval interval = segmentsToCompact.get(0).getInterval();
                if (segmentsToCompact.stream().allMatch(segment -> interval.overlaps(segment.getInterval()))) {
                    try {
                        segmentGranularityToUse = GranularityType.fromPeriod(interval.toPeriod()).getDefaultGranularity();
                    } catch (IAE iae) {
                        // This case can happen if the existing segment interval result in complicated periods.
                        // Fall back to setting segmentGranularity as null
                        LOG.warn("Cannot determine segmentGranularity from interval [%s]", interval);
                    }
                } else {
                    LOG.warn("segmentsToCompact does not have the same interval. Fallback to not setting segmentGranularity for auto compaction task");
                }
            } else {
                segmentGranularityToUse = config.getGranularitySpec().getSegmentGranularity();
            }
            granularitySpec = new ClientCompactionTaskGranularitySpec(segmentGranularityToUse, config.getGranularitySpec() != null ? config.getGranularitySpec().getQueryGranularity() : null, config.getGranularitySpec() != null ? config.getGranularitySpec().isRollup() : null);
            // Create dimensionsSpec to send to compaction task
            ClientCompactionTaskDimensionsSpec dimensionsSpec;
            if (config.getDimensionsSpec() != null) {
                dimensionsSpec = new ClientCompactionTaskDimensionsSpec(config.getDimensionsSpec().getDimensions());
            } else {
                dimensionsSpec = null;
            }
            // Create transformSpec to send to compaction task
            ClientCompactionTaskTransformSpec transformSpec = null;
            if (config.getTransformSpec() != null) {
                transformSpec = new ClientCompactionTaskTransformSpec(config.getTransformSpec().getFilter());
            }
            Boolean dropExisting = null;
            if (config.getIoConfig() != null) {
                dropExisting = config.getIoConfig().isDropExisting();
            }
            // make tuningConfig
            final String taskId = indexingServiceClient.compactSegments("coordinator-issued", segmentsToCompact, config.getTaskPriority(), ClientCompactionTaskQueryTuningConfig.from(config.getTuningConfig(), config.getMaxRowsPerSegment()), granularitySpec, dimensionsSpec, config.getMetricsSpec(), transformSpec, dropExisting, newAutoCompactionContext(config.getTaskContext()));
            LOG.info("Submitted a compactionTask[%s] for %s segments", taskId, segmentsToCompact.size());
            LOG.infoSegments(segmentsToCompact, "Compacting segments");
            // Count the compaction task itself + its sub tasks
            numSubmittedTasks++;
            numCompactionTasksAndSubtasks += findMaxNumTaskSlotsUsedByOneCompactionTask(config.getTuningConfig());
        } else {
            throw new ISE("segmentsToCompact is empty?");
        }
    }
    return makeStats(currentRunAutoCompactionSnapshotBuilders, numSubmittedTasks, iterator);
}
Also used : ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Granularity(org.apache.druid.java.util.common.granularity.Granularity) IAE(org.apache.druid.java.util.common.IAE) DataSegment(org.apache.druid.timeline.DataSegment) AutoCompactionSnapshot(org.apache.druid.server.coordinator.AutoCompactionSnapshot) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) ClientCompactionTaskDimensionsSpec(org.apache.druid.client.indexing.ClientCompactionTaskDimensionsSpec) ISE(org.apache.druid.java.util.common.ISE) Interval(org.joda.time.Interval)

Aggregations

ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)30 Test (org.junit.Test)26 ImmutableList (com.google.common.collect.ImmutableList)13 ArrayList (java.util.ArrayList)13 List (java.util.List)13 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)12 DataSegment (org.apache.druid.timeline.DataSegment)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 ParallelIndexIngestionSpec (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)7 ClientCompactionTaskTransformSpec (org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec)6 HttpIndexingServiceClient (org.apache.druid.client.indexing.HttpIndexingServiceClient)6 PartitionConfigurationManager (org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager)6 SegmentProvider (org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider)6 DataSourceCompactionConfig (org.apache.druid.server.coordinator.DataSourceCompactionConfig)6 TaskStatus (org.apache.druid.indexer.TaskStatus)5 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)5 IndexSpec (org.apache.druid.segment.IndexSpec)5 UserCompactionTaskQueryTuningConfig (org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig)5