Search in sources :

Example 21 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactRangeAndDynamicPartitionedSegments.

@Test
public void testCompactRangeAndDynamicPartitionedSegments() {
    runIndexTask(new SingleDimensionPartitionsSpec(2, null, "dim", false), false);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).build();
    final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask));
    Assert.assertEquals(3, intervalToSegments.size());
    Assert.assertEquals(ImmutableSet.of(Intervals.of("2014-01-01T00/PT1H"), Intervals.of("2014-01-01T01/PT1H"), Intervals.of("2014-01-01T02/PT1H")), intervalToSegments.keySet());
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segmentsInInterval = entry.getValue();
        Assert.assertEquals(1, segmentsInInterval.size());
        final ShardSpec shardSpec = segmentsInInterval.get(0).getShardSpec();
        if (lockGranularity == LockGranularity.TIME_CHUNK) {
            Assert.assertSame(NumberedShardSpec.class, shardSpec.getClass());
            final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) shardSpec;
            Assert.assertEquals(0, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getNumCorePartitions());
        } else {
            Assert.assertSame(NumberedOverwriteShardSpec.class, shardSpec.getClass());
            final NumberedOverwriteShardSpec numberedShardSpec = (NumberedOverwriteShardSpec) shardSpec;
            Assert.assertEquals(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getAtomicUpdateGroupSize());
        }
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 22 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CompactionTaskParallelRunTest method testCompactHashAndDynamicPartitionedSegments.

@Test
public void testCompactHashAndDynamicPartitionedSegments() {
    runIndexTask(new HashedPartitionsSpec(null, 2, null), false);
    runIndexTask(null, true);
    final Builder builder = new Builder(DATA_SOURCE, getSegmentCacheManagerFactory(), RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.inputSpec(new CompactionIntervalSpec(INTERVAL_TO_INDEX, null)).tuningConfig(AbstractParallelIndexSupervisorTaskTest.DEFAULT_TUNING_CONFIG_FOR_PARALLEL_INDEXING).build();
    final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask));
    Assert.assertEquals(3, intervalToSegments.size());
    Assert.assertEquals(ImmutableSet.of(Intervals.of("2014-01-01T00/PT1H"), Intervals.of("2014-01-01T01/PT1H"), Intervals.of("2014-01-01T02/PT1H")), intervalToSegments.keySet());
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segmentsInInterval = entry.getValue();
        Assert.assertEquals(1, segmentsInInterval.size());
        final ShardSpec shardSpec = segmentsInInterval.get(0).getShardSpec();
        if (lockGranularity == LockGranularity.TIME_CHUNK) {
            Assert.assertSame(NumberedShardSpec.class, shardSpec.getClass());
            final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) shardSpec;
            Assert.assertEquals(0, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getNumCorePartitions());
        } else {
            Assert.assertSame(NumberedOverwriteShardSpec.class, shardSpec.getClass());
            final NumberedOverwriteShardSpec numberedShardSpec = (NumberedOverwriteShardSpec) shardSpec;
            Assert.assertEquals(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, numberedShardSpec.getPartitionNum());
            Assert.assertEquals(1, numberedShardSpec.getAtomicUpdateGroupSize());
        }
    }
}
Also used : HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DataSegment(org.apache.druid.timeline.DataSegment) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) Interval(org.joda.time.Interval) AbstractParallelIndexSupervisorTaskTest(org.apache.druid.indexing.common.task.batch.parallel.AbstractParallelIndexSupervisorTaskTest) Test(org.junit.Test)

Example 23 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CompactionTaskRunTest method testRunWithDynamicPartitioning.

@Test
public void testRunWithDynamicPartitioning() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).build();
    final Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    final List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(3, segments.size());
    for (int i = 0; i < 3; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
        Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1))), segments.get(i).getLastCompactionState());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(32768, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
        }
    }
    List<String> rowsFromSegment = getCSVFormatRowsFromSegments(segments);
    Assert.assertEquals(TEST_ROWS, rowsFromSegment);
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Test(org.junit.Test)

Example 24 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CompactionTaskRunTest method testRunIndexAndCompactAtTheSameTimeForDifferentInterval.

@Test
public void testRunIndexAndCompactAtTheSameTimeForDifferentInterval() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01T00:00:00/2014-01-02T03:00:00")).build();
    File tmpDir = temporaryFolder.newFolder();
    File tmpFile = File.createTempFile("druid", "index", tmpDir);
    try (BufferedWriter writer = Files.newWriter(tmpFile, StandardCharsets.UTF_8)) {
        writer.write("2014-01-01T03:00:10Z,a,1\n");
        writer.write("2014-01-01T03:00:10Z,b,2\n");
        writer.write("2014-01-01T03:00:10Z,c,3\n");
        writer.write("2014-01-01T04:00:20Z,a,1\n");
        writer.write("2014-01-01T04:00:20Z,b,2\n");
        writer.write("2014-01-01T04:00:20Z,c,3\n");
        writer.write("2014-01-01T05:00:30Z,a,1\n");
        writer.write("2014-01-01T05:00:30Z,b,2\n");
        writer.write("2014-01-01T05:00:30Z,c,3\n");
    }
    IndexTask indexTask = new IndexTask(null, null, IndexTaskTest.createIngestionSpec(getObjectMapper(), tmpDir, DEFAULT_PARSE_SPEC, null, new UniformGranularitySpec(Granularities.HOUR, Granularities.MINUTE, null), IndexTaskTest.createTuningConfig(2, 2, null, 2L, null, false, true), false, false), null);
    final Future<Pair<TaskStatus, List<DataSegment>>> compactionFuture = exec.submit(() -> runTask(compactionTask));
    final Future<Pair<TaskStatus, List<DataSegment>>> indexFuture = exec.submit(() -> runTask(indexTask));
    Assert.assertTrue(indexFuture.get().lhs.isSuccess());
    List<DataSegment> segments = indexFuture.get().rhs;
    Assert.assertEquals(6, segments.size());
    for (int i = 0; i < 6; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", 3 + i / 2, 3 + i / 2 + 1), segments.get(i).getInterval());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedShardSpec(i % 2, 0), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(i % 2, 2), segments.get(i).getShardSpec());
        }
    }
    Assert.assertTrue(compactionFuture.get().lhs.isSuccess());
    segments = compactionFuture.get().rhs;
    Assert.assertEquals(3, segments.size());
    for (int i = 0; i < 3; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1), segments.get(i).getInterval());
        Assert.assertEquals(getDefaultCompactionState(Granularities.HOUR, Granularities.MINUTE, ImmutableList.of(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i, i + 1))), segments.get(i).getLastCompactionState());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, 0, 2, (short) 1, (short) 1), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(i).getShardSpec());
        }
    }
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) DataSegment(org.apache.druid.timeline.DataSegment) BufferedWriter(java.io.BufferedWriter) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) File(java.io.File) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Pair(org.apache.druid.java.util.common.Pair) Test(org.junit.Test)

Example 25 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CompactionTaskRunTest method testCompactionWithFilterInTransformSpec.

@Test
public void testCompactionWithFilterInTransformSpec() throws Exception {
    runIndexTask();
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    // day segmentGranularity
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01/2014-01-02")).granularitySpec(new ClientCompactionTaskGranularitySpec(Granularities.DAY, null, null)).transformSpec(new ClientCompactionTaskTransformSpec(new SelectorDimFilter("dim", "a", null))).build();
    Pair<TaskStatus, List<DataSegment>> resultPair = runTask(compactionTask);
    Assert.assertTrue(resultPair.lhs.isSuccess());
    List<DataSegment> segments = resultPair.rhs;
    Assert.assertEquals(1, segments.size());
    Assert.assertEquals(Intervals.of("2014-01-01/2014-01-02"), segments.get(0).getInterval());
    Assert.assertEquals(new NumberedShardSpec(0, 1), segments.get(0).getShardSpec());
    ObjectMapper mapper = new DefaultObjectMapper();
    Map<String, String> expectedLongSumMetric = new HashMap<>();
    expectedLongSumMetric.put("type", "longSum");
    expectedLongSumMetric.put("name", "val");
    expectedLongSumMetric.put("fieldName", "val");
    expectedLongSumMetric.put("expression", null);
    CompactionState expectedCompactionState = new CompactionState(new DynamicPartitionsSpec(5000000, Long.MAX_VALUE), new DimensionsSpec(DimensionsSpec.getDefaultSchemas(ImmutableList.of("ts", "dim"))), ImmutableList.of(expectedLongSumMetric), getObjectMapper().readValue(getObjectMapper().writeValueAsString(compactionTask.getTransformSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new IndexSpec()), Map.class), mapper.readValue(mapper.writeValueAsString(new UniformGranularitySpec(Granularities.DAY, Granularities.MINUTE, true, ImmutableList.of(Intervals.of("2014-01-01T00:00:00/2014-01-01T03:00:00")))), Map.class));
    Assert.assertEquals(expectedCompactionState, segments.get(0).getLastCompactionState());
}
Also used : IndexSpec(org.apache.druid.segment.IndexSpec) HashMap(java.util.HashMap) Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) ClientCompactionTaskTransformSpec(org.apache.druid.client.indexing.ClientCompactionTaskTransformSpec) ClientCompactionTaskGranularitySpec(org.apache.druid.client.indexing.ClientCompactionTaskGranularitySpec) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) CompactionState(org.apache.druid.timeline.CompactionState) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Map(java.util.Map) HashMap(java.util.HashMap) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test)

Aggregations

NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)58 Test (org.junit.Test)45 DataSegment (org.apache.druid.timeline.DataSegment)41 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)26 ImmutableList (com.google.common.collect.ImmutableList)24 List (java.util.List)24 ArrayList (java.util.ArrayList)23 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)14 Interval (org.joda.time.Interval)14 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 IOException (java.io.IOException)12 File (java.io.File)11 Map (java.util.Map)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 HashMap (java.util.HashMap)10 TaskStatus (org.apache.druid.indexer.TaskStatus)9 Before (org.junit.Before)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 NoopTask (org.apache.druid.indexing.common.task.NoopTask)8 Task (org.apache.druid.indexing.common.task.Task)8