Search in sources :

Example 1 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactSegmentsTest method testCompactWithRollupInGranularitySpec.

@Test
public void testCompactWithRollupInGranularitySpec() {
    final HttpIndexingServiceClient mockIndexingServiceClient = Mockito.mock(HttpIndexingServiceClient.class);
    final CompactSegments compactSegments = new CompactSegments(COORDINATOR_CONFIG, JSON_MAPPER, mockIndexingServiceClient);
    final List<DataSourceCompactionConfig> compactionConfigs = new ArrayList<>();
    final String dataSource = DATA_SOURCE_PREFIX + 0;
    compactionConfigs.add(new DataSourceCompactionConfig(dataSource, 0, 500L, null, // smaller than segment interval
    new Period("PT0H"), new UserCompactionTaskQueryTuningConfig(null, null, null, null, partitionsSpec, null, null, null, null, null, 3, null, null, null, null, null, null), new UserCompactionTaskGranularityConfig(Granularities.YEAR, null, true), null, null, null, null, null));
    doCompactSegments(compactSegments, compactionConfigs);
    ArgumentCaptor<List<DataSegment>> segmentsCaptor = ArgumentCaptor.forClass(List.class);
    ArgumentCaptor<ClientCompactionTaskGranularitySpec> granularitySpecArgumentCaptor = ArgumentCaptor.forClass(ClientCompactionTaskGranularitySpec.class);
    Mockito.verify(mockIndexingServiceClient).compactSegments(ArgumentMatchers.anyString(), segmentsCaptor.capture(), ArgumentMatchers.anyInt(), ArgumentMatchers.any(), granularitySpecArgumentCaptor.capture(), ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any(), ArgumentMatchers.any());
    Assert.assertEquals(datasourceToSegments.get(dataSource).size(), segmentsCaptor.getValue().size());
    ClientCompactionTaskGranularitySpec actual = granularitySpecArgumentCaptor.getValue();
    Assert.assertNotNull(actual);
    ClientCompactionTaskGranularitySpec expected = new ClientCompactionTaskGranularitySpec(Granularities.YEAR, null, true);
    Assert.assertEquals(expected, actual);
}
Also used : ArrayList(java.util.ArrayList) Period(org.joda.time.Period) UserCompactionTaskQueryTuningConfig(org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) HttpIndexingServiceClient(org.apache.druid.client.indexing.HttpIndexingServiceClient) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) Test(org.junit.Test)

Example 2 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTask method createIngestionSchema.

/**
 * Generate {@link ParallelIndexIngestionSpec} from input segments.
 *
 * @return an empty list if input segments don't exist. Otherwise, a generated ingestionSpec.
 */
@VisibleForTesting
static List<ParallelIndexIngestionSpec> createIngestionSchema(final TaskToolbox toolbox, final LockGranularity lockGranularityInUse, final SegmentProvider segmentProvider, final PartitionConfigurationManager partitionConfigurationManager, @Nullable final DimensionsSpec dimensionsSpec, @Nullable final ClientCompactionTaskTransformSpec transformSpec, @Nullable final AggregatorFactory[] metricsSpec, @Nullable final ClientCompactionTaskGranularitySpec granularitySpec, final CoordinatorClient coordinatorClient, final SegmentCacheManagerFactory segmentCacheManagerFactory, final RetryPolicyFactory retryPolicyFactory, final boolean dropExisting) throws IOException, SegmentLoadingException {
    NonnullPair<Map<DataSegment, File>, List<TimelineObjectHolder<String, DataSegment>>> pair = prepareSegments(toolbox, segmentProvider, lockGranularityInUse);
    final Map<DataSegment, File> segmentFileMap = pair.lhs;
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = pair.rhs;
    if (timelineSegments.size() == 0) {
        return Collections.emptyList();
    }
    // find metadata for interval
    // queryableIndexAndSegments is sorted by the interval of the dataSegment
    final List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments = loadSegments(timelineSegments, segmentFileMap, toolbox.getIndexIO());
    final CompactionTuningConfig compactionTuningConfig = partitionConfigurationManager.computeTuningConfig();
    if (granularitySpec == null || granularitySpec.getSegmentGranularity() == null) {
        // original granularity
        final Map<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> intervalToSegments = new TreeMap<>(Comparators.intervalsByStartThenEnd());
        queryableIndexAndSegments.forEach(p -> intervalToSegments.computeIfAbsent(p.rhs.getInterval(), k -> new ArrayList<>()).add(p));
        // unify overlapping intervals to ensure overlapping segments compacting in the same indexSpec
        List<NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>>> intervalToSegmentsUnified = new ArrayList<>();
        Interval union = null;
        List<NonnullPair<QueryableIndex, DataSegment>> segments = new ArrayList<>();
        for (Entry<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegments.entrySet()) {
            Interval cur = entry.getKey();
            if (union == null) {
                union = cur;
                segments.addAll(entry.getValue());
            } else if (union.overlaps(cur)) {
                union = Intervals.utc(union.getStartMillis(), Math.max(union.getEndMillis(), cur.getEndMillis()));
                segments.addAll(entry.getValue());
            } else {
                intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
                union = cur;
                segments = new ArrayList<>(entry.getValue());
            }
        }
        intervalToSegmentsUnified.add(new NonnullPair<>(union, segments));
        final List<ParallelIndexIngestionSpec> specs = new ArrayList<>(intervalToSegmentsUnified.size());
        for (NonnullPair<Interval, List<NonnullPair<QueryableIndex, DataSegment>>> entry : intervalToSegmentsUnified) {
            final Interval interval = entry.lhs;
            final List<NonnullPair<QueryableIndex, DataSegment>> segmentsToCompact = entry.rhs;
            // If granularitySpec is not null, then set segmentGranularity. Otherwise,
            // creates new granularitySpec and set segmentGranularity
            Granularity segmentGranularityToUse = GranularityType.fromPeriod(interval.toPeriod()).getDefaultGranularity();
            final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, segmentsToCompact, dimensionsSpec, transformSpec, metricsSpec, granularitySpec == null ? new ClientCompactionTaskGranularitySpec(segmentGranularityToUse, null, null) : granularitySpec.withSegmentGranularity(segmentGranularityToUse));
            specs.add(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
        }
        return specs;
    } else {
        // given segment granularity
        final DataSchema dataSchema = createDataSchema(segmentProvider.dataSource, queryableIndexAndSegments, dimensionsSpec, transformSpec, metricsSpec, granularitySpec);
        return Collections.singletonList(new ParallelIndexIngestionSpec(dataSchema, createIoConfig(toolbox, dataSchema, segmentProvider.interval, coordinatorClient, segmentCacheManagerFactory, retryPolicyFactory, dropExisting), compactionTuningConfig));
    }
}
Also used : ArrayList(java.util.ArrayList) LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) DataSegment(org.apache.druid.timeline.DataSegment) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NonnullPair(org.apache.druid.java.util.common.NonnullPair) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) TreeMap(java.util.TreeMap) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSchema(org.apache.druid.segment.indexing.DataSchema) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) QueryableIndex(org.apache.druid.segment.QueryableIndex) Map(java.util.Map) BiMap(com.google.common.collect.BiMap) HashMap(java.util.HashMap) HashBiMap(com.google.common.collect.HashBiMap) TreeMap(java.util.TreeMap) File(java.io.File) Interval(org.joda.time.Interval) VisibleForTesting(com.google.common.annotations.VisibleForTesting)

Example 3 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTask method createDataSchema.

private static DataSchema createDataSchema(String dataSource, List<NonnullPair<QueryableIndex, DataSegment>> queryableIndexAndSegments, @Nullable DimensionsSpec dimensionsSpec, @Nullable ClientCompactionTaskTransformSpec transformSpec, @Nullable AggregatorFactory[] metricsSpec, @Nonnull ClientCompactionTaskGranularitySpec granularitySpec) {
    // check index metadata &
    // Decide which values to propagate (i.e. carry over) for rollup & queryGranularity
    final SettableSupplier<Boolean> rollup = new SettableSupplier<>();
    final SettableSupplier<Granularity> queryGranularity = new SettableSupplier<>();
    decideRollupAndQueryGranularityCarryOver(rollup, queryGranularity, queryableIndexAndSegments);
    final Interval totalInterval = JodaUtils.umbrellaInterval(queryableIndexAndSegments.stream().map(p -> p.rhs.getInterval()).collect(Collectors.toList()));
    final Granularity queryGranularityToUse;
    if (granularitySpec.getQueryGranularity() == null) {
        queryGranularityToUse = queryGranularity.get();
        log.info("Generate compaction task spec with segments original query granularity [%s]", queryGranularityToUse);
    } else {
        queryGranularityToUse = granularitySpec.getQueryGranularity();
        log.info("Generate compaction task spec with new query granularity overrided from input [%s]", queryGranularityToUse);
    }
    final GranularitySpec uniformGranularitySpec = new UniformGranularitySpec(Preconditions.checkNotNull(granularitySpec.getSegmentGranularity()), queryGranularityToUse, granularitySpec.isRollup() == null ? rollup.get() : granularitySpec.isRollup(), Collections.singletonList(totalInterval));
    // find unique dimensions
    final DimensionsSpec finalDimensionsSpec = dimensionsSpec == null ? createDimensionsSpec(queryableIndexAndSegments) : dimensionsSpec;
    final AggregatorFactory[] finalMetricsSpec = metricsSpec == null ? createMetricsSpec(queryableIndexAndSegments) : metricsSpec;
    return new DataSchema(dataSource, new TimestampSpec(ColumnHolder.TIME_COLUMN_NAME, "millis", null), finalDimensionsSpec, finalMetricsSpec, uniformGranularitySpec, transformSpec == null ? null : new TransformSpec(transformSpec.getFilter(), null));
}
Also used : LockGranularity(org.apache.druid.indexing.common.LockGranularity) Granularity(org.apache.druid.java.util.common.granularity.Granularity) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) TransformSpec(org.apache.druid.segment.transform.TransformSpec) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) DataSchema(org.apache.druid.segment.indexing.DataSchema) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) GranularitySpec(org.apache.druid.segment.indexing.granularity.GranularitySpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Interval(org.joda.time.Interval)

Example 4 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskTest method testCreateCompactionTaskWithSameGranularitySpecAndSegmentGranularityShouldSucceed.

@Test
public void testCreateCompactionTaskWithSameGranularitySpecAndSegmentGranularityShouldSucceed() {
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    builder.inputSpec(new CompactionIntervalSpec(COMPACTION_INTERVAL, SegmentUtils.hashIds(SEGMENTS)));
    builder.tuningConfig(createTuningConfig());
    builder.segmentGranularity(Granularities.HOUR);
    builder.granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.HOUR, Granularities.DAY, null));
    final CompactionTask taskCreatedWithSegmentGranularity = builder.build();
    Assert.assertEquals(Granularities.HOUR, taskCreatedWithSegmentGranularity.getSegmentGranularity());
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) Test(org.junit.Test)

Example 5 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method testCompactionWithFilterInTransformSpec.

@Test
public void testCompactionWithFilterInTransformSpec() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    // day segmentGranularity
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(1, segments.size());
    Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
    Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
    ObjectMapper mapper = new DefaultObjectMapper();
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
    Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)30 Test (org.junit.Test)26 ImmutableList (com.google.common.collect.ImmutableList)13 ArrayList (java.util.ArrayList)13 List (java.util.List)13 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)12 DataSegment (org.apache.druid.timeline.DataSegment)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 ParallelIndexIngestionSpec (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)7 ClientCompactionTaskTransformSpec (org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec)6 HttpIndexingServiceClient (org.apache.druid.client.indexing.HttpIndexingServiceClient)6 PartitionConfigurationManager (org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager)6 SegmentProvider (org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider)6 DataSourceCompactionConfig (org.apache.druid.server.coordinator.DataSourceCompactionConfig)6 TaskStatus (org.apache.druid.indexer.TaskStatus)5 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)5 IndexSpec (org.apache.druid.segment.IndexSpec)5 UserCompactionTaskQueryTuningConfig (org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig)5