Search in sources :

Example 11 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class TaskSerdeTest method testIndexTaskwithResourceSerde.

@Test
public void testIndexTaskwithResourceSerde() throws Exception {
    final IndexTask task = new IndexTask(null, new TaskResource("rofl", 2), new IndexIngestionSpec(new DataSchema("foo", new TimestampSpec(null, null, null), DimensionsSpec.EMPTY, new AggregatorFactory[] { new DoubleSumAggregatorFactory("met", "met") }, new UniformGranularitySpec(Granularities.DAY, null, ImmutableList.of(Intervals.of("2010-01-01/P2D"))), null), new IndexIOConfig(null, new LocalInputSource(new File("lol"), "rofl"), new NoopInputFormat(), true, false), new IndexTuningConfig(null, null, null, 10, null, null, null, null, null, null, new DynamicPartitionsSpec(10000, null), indexSpec, null, 3, false, null, null, null, null, null, null, null, null, null)), null);
    for (final Module jacksonModule : new FirehoseModule().getJacksonModules()) {
        jsonMapper.registerModule(jacksonModule);
    }
    final String json = jsonMapper.writeValueAsString(task);
    // Just want to run the clock a bit to make sure the task id doesn't change
    Thread.sleep(100);
    final IndexTask task2 = (IndexTask) jsonMapper.readValue(json, Task.class);
    Assert.assertEquals("foo", task.getDataSource());
    Assert.assertEquals(task.getId(), task2.getId());
    Assert.assertEquals(2, task.getTaskResource().getRequiredCapacity());
    Assert.assertEquals("rofl", task.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getTaskResource().getRequiredCapacity(), task2.getTaskResource().getRequiredCapacity());
    Assert.assertEquals(task.getTaskResource().getAvailabilityGroup(), task2.getTaskResource().getAvailabilityGroup());
    Assert.assertEquals(task.getGroupId(), task2.getGroupId());
    Assert.assertEquals(task.getDataSource(), task2.getDataSource());
    Assert.assertTrue(task.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
    Assert.assertTrue(task2.getIngestionSchema().getIOConfig().getInputSource() instanceof LocalInputSource);
}
Also used : IndexIOConfig(org.apache.druid.indexing.common.task.IndexTask.IndexIOConfig) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) LocalInputSource(org.apache.druid.data.input.impl.LocalInputSource) DataSchema(org.apache.druid.segment.indexing.DataSchema) IndexIngestionSpec(org.apache.druid.indexing.common.task.IndexTask.IndexIngestionSpec) UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) FirehoseModule(org.apache.druid.guice.FirehoseModule) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) NoopInputFormat(org.apache.druid.data.input.impl.NoopInputFormat) Module(com.fasterxml.jackson.databind.Module) FirehoseModule(org.apache.druid.guice.FirehoseModule) File(java.io.File) ParallelIndexTuningConfig(org.apache.druid.indexing.common.task.batch.parallel.ParallelIndexTuningConfig) IndexTuningConfig(org.apache.druid.indexing.common.task.IndexTask.IndexTuningConfig) Test(org.junit.Test)

Example 12 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class RangePartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSuccessfullyAppend.

@Test
public void testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSuccessfullyAppend() {
    if (useMultivalueDim) {
        return;
    }
    final int targetRowsPerSegment = NUM_ROW / DIM_FILE_CARDINALITY / NUM_PARTITION;
    final Set<DataSegment> publishedSegments = new HashSet<>();
    publishedSegments.addAll(runTestTask(new SingleDimensionPartitionsSpec(targetRowsPerSegment, null, DIM1, false), TaskState.SUCCESS, false));
    // Append
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
    // And append again
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segments = entry.getValue();
        final List<DataSegment> rangedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == SingleDimensionShardSpec.class).collect(Collectors.toList());
        final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
        for (DataSegment rangedSegment : rangedSegments) {
            final SingleDimensionShardSpec rangeShardSpec = (SingleDimensionShardSpec) rangedSegment.getShardSpec();
            for (DataSegment linearSegment : linearSegments) {
                Assert.assertEquals(rangedSegment.getInterval(), linearSegment.getInterval());
                Assert.assertEquals(rangedSegment.getVersion(), linearSegment.getVersion());
                final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
                Assert.assertEquals(rangeShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
                Assert.assertTrue(rangeShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
            }
        }
    }
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) HashMultimap(com.google.common.collect.HashMultimap) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) Path(java.nio.file.Path) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) InputFormat(org.apache.druid.data.input.InputFormat) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) List(java.util.List) ProvideSystemProperty(org.junit.contrib.java.lang.system.ProvideSystemProperty) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Entry(java.util.Map.Entry) StringTuple(org.apache.druid.data.input.StringTuple) Intervals(org.apache.druid.java.util.common.Intervals) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) Before(org.junit.Before) Files(java.nio.file.Files) Matchers(org.hamcrest.Matchers) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) SetMultimap(com.google.common.collect.SetMultimap) File(java.io.File) NullValueHandlingConfig(org.apache.druid.common.config.NullValueHandlingConfig) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Assert(org.junit.Assert) Collections(java.util.Collections) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 13 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class ParallelIndexTuningConfigTest method testSerdeWithMaxNumSubTasks.

@Test
public void testSerdeWithMaxNumSubTasks() throws IOException {
    final int maxNumSubTasks = 250;
    final ParallelIndexTuningConfig tuningConfig = new ParallelIndexTuningConfig(null, null, null, 10, 1000L, null, null, null, null, new DynamicPartitionsSpec(100, 100L), new IndexSpec(new RoaringBitmapSerdeFactory(true), CompressionStrategy.UNCOMPRESSED, CompressionStrategy.LZF, LongEncodingStrategy.LONGS), new IndexSpec(), 1, false, true, 10000L, OffHeapMemorySegmentWriteOutMediumFactory.instance(), maxNumSubTasks, null, 100, 20L, new Duration(3600), 128, null, null, false, null, null, null, null, null);
    final byte[] json = mapper.writeValueAsBytes(tuningConfig);
    final ParallelIndexTuningConfig fromJson = (ParallelIndexTuningConfig) mapper.readValue(json, TuningConfig.class);
    Assert.assertEquals(fromJson, tuningConfig);
}
Also used : TuningConfig(org.apache.druid.segment.indexing.TuningConfig) IndexSpec(org.apache.druid.segment.IndexSpec) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) RoaringBitmapSerdeFactory(org.apache.druid.segment.data.RoaringBitmapSerdeFactory) Duration(org.joda.time.Duration) Test(org.junit.Test)

Example 14 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class PartialCompactionTest method testPartialCompactRangeAndDynamicPartitionedSegments.

@Test
public void testPartialCompactRangeAndDynamicPartitionedSegments() {
    final Map<Interval, List<DataSegment>> rangePartitionedSegments = SegmentUtils.groupSegmentsByInterval(runTestTask(new SingleDimensionPartitionsSpec(10, null, "dim1", false), TaskState.SUCCESS, false));
    final Map<Interval, List<DataSegment>> linearlyPartitionedSegments = SegmentUtils.groupSegmentsByInterval(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
    // Pick half of each partition lists to compact together
    rangePartitionedSegments.values().forEach(segmentsInInterval -> segmentsInInterval.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum())));
    linearlyPartitionedSegments.values().forEach(segmentsInInterval -> segmentsInInterval.sort(Comparator.comparing(segment -> segment.getShardSpec().getPartitionNum())));
    final List<DataSegment> segmentsToCompact = new ArrayList<>();
    for (List<DataSegment> segmentsInInterval : rangePartitionedSegments.values()) {
        segmentsToCompact.addAll(segmentsInInterval.subList(segmentsInInterval.size() / 2, segmentsInInterval.size()));
    }
    for (List<DataSegment> segmentsInInterval : linearlyPartitionedSegments.values()) {
        segmentsToCompact.addAll(segmentsInInterval.subList(0, segmentsInInterval.size() / 2));
    }
    final CompactionTask compactionTask = newCompactionTaskBuilder().inputSpec(SpecificSegmentsSpec.fromSegments(segmentsToCompact)).tuningConfig(newTuningConfig(new DynamicPartitionsSpec(20, null), 2, false)).build();
    final Map<Interval, List<DataSegment>> compactedSegments = SegmentUtils.groupSegmentsByInterval(runTask(compactionTask, TaskState.SUCCESS));
    for (List<DataSegment> segmentsInInterval : compactedSegments.values()) {
        final int expectedAtomicUpdateGroupSize = segmentsInInterval.size();
        for (DataSegment segment : segmentsInInterval) {
            Assert.assertEquals(expectedAtomicUpdateGroupSize, segment.getShardSpec().getAtomicUpdateGroupSize());
        }
    }
}
Also used : DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) CompactionTask(org.apache.druid.indexing.common.task.CompactionTask) ArrayList(java.util.ArrayList) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) ArrayList(java.util.ArrayList) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 15 with DynamicPartitionsSpec

use of org.apache.druid.indexer.partitions.DynamicPartitionsSpec in project druid by druid-io.

the class HashPartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend.

@Test
public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend() {
    final Set<DataSegment> publishedSegments = new HashSet<>();
    publishedSegments.addAll(runTestTask(new HashedPartitionsSpec(null, numShards, ImmutableList.of("dim1", "dim2")), TaskState.SUCCESS, false));
    // Append
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
    // And append again
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segments = entry.getValue();
        final List<DataSegment> hashedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == HashBasedNumberedShardSpec.class).collect(Collectors.toList());
        final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
        for (DataSegment hashedSegment : hashedSegments) {
            final HashBasedNumberedShardSpec hashShardSpec = (HashBasedNumberedShardSpec) hashedSegment.getShardSpec();
            for (DataSegment linearSegment : linearSegments) {
                Assert.assertEquals(hashedSegment.getInterval(), linearSegment.getInterval());
                Assert.assertEquals(hashedSegment.getVersion(), linearSegment.getVersion());
                final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
                Assert.assertEquals(hashShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
                Assert.assertTrue(hashShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Parameterized(org.junit.runners.Parameterized) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) DateTimes(org.apache.druid.java.util.common.DateTimes) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) Files(java.nio.file.Files) InputFormat(org.apache.druid.data.input.InputFormat) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Entry(java.util.Map.Entry) Assert(org.junit.Assert) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Aggregations

DynamicPartitionsSpec (org.apache.druid.indexer.partitions.DynamicPartitionsSpec)52 Test (org.junit.Test)34 IndexSpec (org.apache.druid.segment.IndexSpec)19 List (java.util.List)15 Map (java.util.Map)15 ImmutableList (com.google.common.collect.ImmutableList)13 StringUtils (org.apache.druid.java.util.common.StringUtils)13 DataSegment (org.apache.druid.timeline.DataSegment)13 ImmutableMap (com.google.common.collect.ImmutableMap)12 HashMap (java.util.HashMap)11 Function (java.util.function.Function)11 Pair (org.apache.druid.java.util.common.Pair)11 Closeable (java.io.Closeable)10 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)10 RoaringBitmapSerdeFactory (org.apache.druid.segment.data.RoaringBitmapSerdeFactory)10 Duration (org.joda.time.Duration)10 Interval (org.joda.time.Interval)10 ArrayList (java.util.ArrayList)9 UUID (java.util.UUID)9 UniformGranularitySpec (org.apache.druid.segment.indexing.granularity.UniformGranularitySpec)9