Search in sources :

Example 16 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskRunTest method testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity.

@Test
public void testWithGranularitySpecNullQueryGranularityAndNullSegmentGranularity() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask1 = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(null, null, null)).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask1);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(3, segments.size());
    for (int i = 0; i < 3; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
        Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1))), segments.get(i).getLastCompactionState());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
        }
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 17 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class ClientCompactionTaskQuerySerdeTest method testClientCompactionTaskQueryToCompactionTask.

@Test
public void testClientCompactionTaskQueryToCompactionTask() throws IOException {
    final ObjectMapper mapper = setupInjectablesInObjectMapper(new DefaultObjectMapper());
    final ClientCompactionTaskQuery query = new ClientCompactionTaskQuery("id", "datasource", new ClientCompactionIOConfig(new ClientCompactionIntervalSpec(Intervals.of("2019/2020"), "testSha256OfSortedSegmentIds"), true), new ClientCompactionTaskQueryTuningConfig(null, 40000, 2000L, null, new SegmentsSplitHintSpec(new HumanReadableBytes(100000L), 10), new DynamicPartitionsSpec(100, 30000L), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(new DefaultBitmapSerdeFactory(), CompressionStrategy.LZ4, CompressionStrategy.UNCOMPRESSED, LongEncodingStrategy.AUTO), 2, 1000L, TmpFileSegmentWriteOutMediumFactory.instance(), 100, 5, 1000L, new Duration(3000L), 7, 1000, 100), new ClientCompactionTaskGranularitySpec(Granularities.DAY, Granularities.HOUR, true), new ClientCompactionTaskDimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim1", "foo", null)), ImmutableMap.of("key", "value"));
    final byte[] json = mapper.writeValueAsBytes(query);
    final CompactionTask task = (CompactionTask) mapper.readValue(json, Task.class);
    Assert.assertEquals(query.getId(), task.getId());
    Assert.assertEquals(query.getDataSource(), task.getDataSource());
    Assert.assertTrue(task.getIoConfig().getInputSpec() instanceof CompactionIntervalSpec);
    Assert.assertEquals(query.getIoConfig().getInputSpec().getInterval(), ((CompactionIntervalSpec) task.getIoConfig().getInputSpec()).getInterval());
    Assert.assertEquals(query.getIoConfig().getInputSpec().getSha256OfSortedSegmentIds(), ((CompactionIntervalSpec) task.getIoConfig().getInputSpec()).getSha256OfSortedSegmentIds());
    Assert.assertEquals(query.getTuningConfig().getMaxRowsInMemory().intValue(), task.getTuningConfig().getMaxRowsInMemory());
    Assert.assertEquals(query.getTuningConfig().getMaxBytesInMemory().longValue(), task.getTuningConfig().getMaxBytesInMemory());
    Assert.assertEquals(query.getTuningConfig().getSplitHintSpec(), task.getTuningConfig().getSplitHintSpec());
    Assert.assertEquals(query.getTuningConfig().getPartitionsSpec(), task.getTuningConfig().getPartitionsSpec());
    Assert.assertEquals(query.getTuningConfig().getIndexSpec(), task.getTuningConfig().getIndexSpec());
    Assert.assertEquals(query.getTuningConfig().getIndexSpecForIntermediatePersists(), task.getTuningConfig().getIndexSpecForIntermediatePersists());
    Assert.assertEquals(query.getTuningConfig().getPushTimeout().longValue(), task.getTuningConfig().getPushTimeout());
    Assert.assertEquals(query.getTuningConfig().getSegmentWriteOutMediumFactory(), task.getTuningConfig().getSegmentWriteOutMediumFactory());
    Assert.assertEquals(query.getTuningConfig().getMaxNumConcurrentSubTasks().intValue(), task.getTuningConfig().getMaxNumConcurrentSubTasks());
    Assert.assertEquals(query.getTuningConfig().getMaxRetry().intValue(), task.getTuningConfig().getMaxRetry());
    Assert.assertEquals(query.getTuningConfig().getTaskStatusCheckPeriodMs().longValue(), task.getTuningConfig().getTaskStatusCheckPeriodMs());
    Assert.assertEquals(query.getTuningConfig().getChatHandlerTimeout(), task.getTuningConfig().getChatHandlerTimeout());
    Assert.assertEquals(query.getTuningConfig().getMaxNumSegmentsToMerge().intValue(), task.getTuningConfig().getMaxNumSegmentsToMerge());
    Assert.assertEquals(query.getTuningConfig().getTotalNumMergeTasks().intValue(), task.getTuningConfig().getTotalNumMergeTasks());
    Assert.assertEquals(query.getGranularitySpec(), task.getGranularitySpec());
    Assert.assertEquals(query.getGranularitySpec().getQueryGranularity(), task.getGranularitySpec().getQueryGranularity());
    Assert.assertEquals(query.getGranularitySpec().getSegmentGranularity(), task.getGranularitySpec().getSegmentGranularity());
    Assert.assertEquals(query.getGranularitySpec().isRollup(), task.getGranularitySpec().isRollup());
    Assert.assertEquals(query.getIoConfig().isDropExisting(), task.getIoConfig().isDropExisting());
    Assert.assertEquals(query.getContext(), task.getContext());
    Assert.assertEquals(query.getDimensionsSpec().getDimensions(), task.getDimensionsSpec().getDimensions());
    Assert.assertEquals(query.getTransformSpec().getFilter(), task.getTransformSpec().getFilter());
    Assert.assertArrayEquals(query.getMetricsSpec(), task.getMetricsSpec());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) ClientCompactionIOConfig(org.apache.druid.client.indexing.ClientCompactionIOConfig) ClientCompactionIntervalSpec(org.apache.druid.client.indexing.ClientCompactionIntervalSpec) Duration(org.joda.time.Duration) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) ClientCompactionIntervalSpec(org.apache.druid.client.indexing.ClientCompactionIntervalSpec) SegmentsSplitHintSpec(org.apache.druid.data.input.SegmentsSplitHintSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) DefaultBitmapSerdeFactory(org.apache.druid.segment.data.BitmapSerde.DefaultBitmapSerdeFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) ClientCompactionTaskQuery(org.apache.druid.client.indexing.ClientCompactionTaskQuery) ClientCompactionTaskDimensionsSpec(org.apache.druid.client.indexing.ClientCompactionTaskDimensionsSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) HumanReadableBytes(org.apache.druid.java.util.common.HumanReadableBytes) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Test(org.junit.Test)

Example 18 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactionDoesNotDropSegmentsIfDropFlagNotSet.

@Test
public void testCompactionDoesNotDropSegmentsIfDropFlagNotSet() {
    runIndexTask(null, true);
    Collection<DataSegment> usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    Assert.assertEquals(3, usedSegments.size());
    for (DataSegment segment : usedSegments) {
        Assert.assertTrue(Granularities.HOUR.isAligned(segment.getInterval()));
    }
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.MINUTE, null, null)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    // All the HOUR segments did not get dropped since MINUTES segments did not fully covering the 3 HOURS interval.
    Assert.assertEquals(6, usedSegments.size());
    int hourSegmentCount = 0;
    int minuteSegmentCount = 0;
    for (DataSegment segment : usedSegments) {
        if (Granularities.MINUTE.isAligned(segment.getInterval())) {
            minuteSegmentCount++;
        }
        if (Granularities.MINUTE.isAligned(segment.getInterval())) {
            hourSegmentCount++;
        }
    }
    Assert.assertEquals(3, hourSegmentCount);
    Assert.assertEquals(3, minuteSegmentCount);
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 19 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactionDropSegmentsOfInputIntervalIfDropFlagIsSet.

@Test
public void testCompactionDropSegmentsOfInputIntervalIfDropFlagIsSet() {
    runIndexTask(null, true);
    Collection<DataSegment> usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    Assert.assertEquals(3, usedSegments.size());
    for (DataSegment segment : usedSegments) {
        Assert.assertTrue(Granularities.HOUR.isAligned(segment.getInterval()));
    }
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null), true).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.MINUTE, null, null)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    usedSegments = getCoordinatorClient().fetchUsedSegmentsInDataSourceForIntervals(DATA_SOURCE, ImmutableList.of(INTERVAL_TO_INDEX));
    // All the HOUR segments got dropped even if we do not have all MINUTES segments fully covering the 3 HOURS interval.
    // In fact, we only have 3 minutes of data out of the 3 hours interval.
    Assert.assertEquals(3, usedSegments.size());
    for (DataSegment segment : usedSegments) {
        Assert.assertTrue(Granularities.MINUTE.isAligned(segment.getInterval()));
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DataSegment(org.apache.druid.timeline.DataSegment) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 20 with ClientCompactionTaskGranularitySpec

use of org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec in project druid by druid-io.

the class CompactionTaskTest method testSegmentGranularityAndNullQueryGranularity.

@Test
public void testSegmentGranularityAndNullQueryGranularity() throws IOException, SegmentLoadingException {
    final List<ParallelIndexIngestionSpec> ingestionSpecs = CompactionTask.createIngestionSchema(toolbox, LockGranularity.TIME_CHUNK, new SegmentProvider(DATA_SOURCE, new CompactionIntervalSpec(COMPACTION_INTERVAL, null)), new PartitionConfigurationManager(TUNING_CONFIG), null, null, null, new ClientCompactionTaskGranularitySpec(new PeriodGranularity(Period.months(3), null, null), null, null), COORDINATOR_CLIENT, segmentCacheManagerFactory, RETRY_POLICY_FACTORY, IOConfig.DEFAULT_DROP_EXISTING);
    final List<DimensionsSpec> expectedDimensionsSpec = ImmutableList.of(new DimensionsSpec(getDimensionSchema(new DoubleDimensionSchema("string_to_double"))));
    ingestionSpecs.sort((s1, s2) -> Comparators.intervalsByStartThenEnd().compare(s1.getDataSchema().getGranularitySpec().inputIntervals().get(0), s2.getDataSchema().getGranularitySpec().inputIntervals().get(0)));
    Assert.assertEquals(1, ingestionSpecs.size());
    assertIngestionSchema(ingestionSpecs, expectedDimensionsSpec, AGGREGATORS.stream().map(AggregatorFactory::getCombiningFactory).collect(Collectors.toList()), Collections.singletonList(COMPACTION_INTERVAL), new PeriodGranularity(Period.months(3), null, null), Granularities.NONE, IOConfig.DEFAULT_DROP_EXISTING);
}
Also used : DoubleDimensionSchema(org.apache.druid.data.input.impl.DoubleDimensionSchema) PartitionConfigurationManager(org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) ParallelIndexIngestionSpec(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) SegmentProvider(org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) DoubleLastAggregatorFactory(org.apache.druid.query.aggregation.last.DoubleLastAggregatorFactory) FloatMinAggregatorFactory(org.apache.druid.query.aggregation.FloatMinAggregatorFactory) FloatFirstAggregatorFactory(org.apache.druid.query.aggregation.first.FloatFirstAggregatorFactory) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) DoubleMaxAggregatorFactory(org.apache.druid.query.aggregation.DoubleMaxAggregatorFactory) LongMaxAggregatorFactory(org.apache.druid.query.aggregation.LongMaxAggregatorFactory) Test(org.junit.Test)

Aggregations

ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)30 Test (org.junit.Test)26 ImmutableList (com.google.common.collect.ImmutableList)13 ArrayList (java.util.ArrayList)13 List (java.util.List)13 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)12 DataSegment (org.apache.druid.timeline.DataSegment)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 ParallelIndexIngestionSpec (org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexIngestionSpec)7 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)7 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)7 ClientCompactionTaskTransformSpec (org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec)6 HttpIndexingServiceClient (org.apache.druid.client.indexing.HttpIndexingServiceClient)6 PartitionConfigurationManager (org.apache.druid.indexing.common.task.CompactionTask.PartitionConfigurationManager)6 SegmentProvider (org.apache.druid.indexing.common.task.CompactionTask.SegmentProvider)6 DataSourceCompactionConfig (org.apache.druid.server.coordinator.DataSourceCompactionConfig)6 TaskStatus (org.apache.druid.indexer.TaskStatus)5 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)5 IndexSpec (org.apache.druid.segment.IndexSpec)5 UserCompactionTaskQueryTuningConfig (org.apache.druid.server.coordinator.UserCompactionTaskQueryTuningConfig)5