Search in sources :

Example 1 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunParallelWithHashPartitioningMatchCompactionState.

@Test
public void testRunParallelWithHashPartitioningMatchCompactionState() throws Exception {
    // Hash partitioning is not supported with segment lock yet
    Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new HashedPartitionsSpec(null, 3, null), 2, true)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        // Expect compaction state to exist as store compaction state by default
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        Assert.assertSame(HashBasedNumberedShardSpec.class, segment.getShardSpec().getClass());
        CompactionState expectedState = new CompactionState(new HashedPartitionsSpec(null, 3, null), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 2 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunParallelWithMultiDimensionRangePartitioningWithSingleTask.

@Test
public void testRunParallelWithMultiDimensionRangePartitioningWithSingleTask() throws Exception {
    // Range partitioning is not supported with segment lock yet
    Assume.assumeFalse(lockGranularity == LockGranularity.SEGMENT);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(newTuningConfig(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), 1, true)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        // Expect compaction state to exist as store compaction state by default
        Map<String, String> expectedLongSumMetric = new HashMap<>();
        expectedLongSumMetric.put("type", "longSum");
        expectedLongSumMetric.put("name", "val");
        expectedLongSumMetric.put("fieldName", "val");
        expectedLongSumMetric.put("expression", null);
        Assert.assertSame(DimensionRangeShardSpec.class, segment.getShardSpec().getClass());
        CompactionState expectedState = new CompactionState(new DimensionRangePartitionsSpec(7, null, Arrays.asList("dim1", "dim2"), false), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), null, compactionTask.getTuningConfig().getIndexSpec().asMap(getObjectMapper()), getObjectMapper().readValue(getObjectMapper().writeValueAsString(new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, true, ImmutableList.of(segment.getInterval()))), Map.class));
        Assert.assertEquals(expectedState, segment.getLastCompactionState());
    }
}
Also used : HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) CompactionState(org.apache.druid.timeline.CompactionState) Map(java.util.Map) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 3 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testRunCompactionStateNotStoreIfContextSetToFalse.

@Test
public void testRunCompactionStateNotStoreIfContextSetToFalse() {
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).context(ImmutableMap.of(Tasks.STORE_COMPACTION_STATE_KEY, false)).build();
    final Set<DataSegment> compactedSegments = runTask(compactionTask);
    for (DataSegment segment : compactedSegments) {
        Assert.assertSame(lockGranularity == LockGranularity.TIME_CHUNK ? NumberedShardSpec.class : NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
        // Expect compaction state to exist as store compaction state by default
        Assert.assertEquals(null, segment.getLastCompactionState());
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 4 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactRangeAndDynamicPartitionedSegments.

@Test
public void testCompactRangeAndDynamicPartitionedSegments() {
    runIndexTask(new SingleDimensionPartitionsSpec(2, null, "dim", false), false);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).build();
    final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask));
    Assert.assertEquals(3, intervalToSegments.size());
    Assert.assertEquals(ImmutableSet.of(Intervals.of("2014-01-01T00/PT1H"), Intervals.of("2014-01-01T01/PT1H"), Intervals.of("2014-01-01T02/PT1H")), intervalToSegments.keySet());
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segmentsInInterval = entry.getValue();
        Assert.assertEquals(1, segmentsInInterval.size());
        final ShardSpec shardSpec = segmentsInInterval.get(0).getShardSpec();
        if (lockGranularity == LockGranularity.TIME_CHUNK) {
            Assert.assertSame(NumberedShardSpec.class, shardSpec.getClass());
            final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) shardSpec;
            Assert.assertEquals(0, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getNumCorePartitions());
        } else {
            Assert.assertSame(NumberedOverwriteShardSpec.class, shardSpec.getClass());
            final NumberedOverwriteShardSpec numberedShardSpec = (NumberedOverwriteShardSpec) shardSpec;
            Assert.assertEquals(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getAtomicUpdateGroupSize());
        }
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 5 with Builder

use of org.apache.druid.indexing.common.task.CompactionTask.Builder in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactHashAndDynamicPartitionedSegments.

@Test
public void testCompactHashAndDynamicPartitionedSegments() {
    runIndexTask(new HashedPartitionsSpec(null, 2, null), false);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).build();
    final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask));
    Assert.assertEquals(3, intervalToSegments.size());
    Assert.assertEquals(ImmutableSet.of(Intervals.of("2014-01-01T00/PT1H"), Intervals.of("2014-01-01T01/PT1H"), Intervals.of("2014-01-01T02/PT1H")), intervalToSegments.keySet());
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segmentsInInterval = entry.getValue();
        Assert.assertEquals(1, segmentsInInterval.size());
        final ShardSpec shardSpec = segmentsInInterval.get(0).getShardSpec();
        if (lockGranularity == LockGranularity.TIME_CHUNK) {
            Assert.assertSame(NumberedShardSpec.class, shardSpec.getClass());
            final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) shardSpec;
            Assert.assertEquals(0, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getNumCorePartitions());
        } else {
            Assert.assertSame(NumberedOverwriteShardSpec.class, shardSpec.getClass());
            final NumberedOverwriteShardSpec numberedShardSpec = (NumberedOverwriteShardSpec) shardSpec;
            Assert.assertEquals(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getAtomicUpdateGroupSize());
        }
    }
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DataSegment(org.apache.druid.timeline.DataSegment) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Aggregations

Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)40 Test (org.junit.Test)40 DataSegment (org.apache.druid.timeline.DataSegment)29 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)18 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)18 ImmutableList (com.google.common.collect.ImmutableList)17 ArrayList (java.util.ArrayList)17 List (java.util.List)17 TaskStatus (org.apache.druid.indexer.TaskStatus)15 AbstractParallelIndexSupervisorTaskTest (org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest)13 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 ClientCompactionTaskGranularitySpec (org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec)12 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)12 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)12 HashMap (java.util.HashMap)11 Map (java.util.Map)11 CompactionState (org.apache.druid.timeline.CompactionState)11 ImmutableMap (com.google.common.collect.ImmutableMap)8 DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)5 Interval (org.joda.time.Interval)5