Search in sources :

Example 26 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class CompactionTaskRunTest method testRunIndexAndCompactForSameSegmentAtTheSameTime.

@Test
public void testRunIndexAndCompactForSameSegmentAtTheSameTime() throws Exception {
    runIndexTask();
    // make sure that indexTask becomes ready first, then compactionTask becomes ready, then indexTask runs
    final CountDownLatch compactionTaskReadyLatch = new CountDownLatch(1);
    final CountDownLatch indexTaskStartLatch = new CountDownLatch(1);
    final Future<Pair<TaskStatus, List<DataSegment>>> indexFuture = exec.submit(() -> runIndexTask(compactionTaskReadyLatch, indexTaskStartLatch, false));
    final Builder builder = new Builder(DATA_SOURCE, segmentCacheManagerFactory, RETRY_POLICY_FACTORY);
    final CompactionTask compactionTask = builder.interval(Intervals.of("2014-01-01T00:00:00/2014-01-02T03:00:00")).build();
    final Future<Pair<TaskStatus, List<DataSegment>>> compactionFuture = exec.submit(() -> {
        compactionTaskReadyLatch.await();
        return runTask(compactionTask, indexTaskStartLatch, null);
    });
    Assert.assertTrue(indexFuture.get().lhs.isSuccess());
    List<DataSegment> segments = indexFuture.get().rhs;
    Assert.assertEquals(6, segments.size());
    for (int i = 0; i < 6; i++) {
        Assert.assertEquals(Intervals.of("2014-01-01T0%d:00:00/2014-01-01T0%d:00:00", i / 2, i / 2 + 1), segments.get(i).getInterval());
        if (lockGranularity == LockGranularity.SEGMENT) {
            Assert.assertEquals(new NumberedOverwriteShardSpec(PartitionIds.NON_ROOT_GEN_START_PARTITION_ID + i % 2, 0, 2, (short) 1, (short) 2), segments.get(i).getShardSpec());
        } else {
            Assert.assertEquals(new NumberedShardSpec(i % 2, 2), segments.get(i).getShardSpec());
        }
    }
    final Pair<TaskStatus, List<DataSegment>> compactionResult = compactionFuture.get();
    Assert.assertEquals(TaskState.FAILED, compactionResult.lhs.getStatusCode());
}
Also used : Builder(org.apache.druid.indexing.common.task.CompactionTask.Builder) CountDownLatch(java.util.concurrent.CountDownLatch) TaskStatus(org.apache.druid.indexer.TaskStatus) DataSegment(org.apache.druid.timeline.DataSegment) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ArrayList(java.util.ArrayList) List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Pair(org.apache.druid.java.util.common.Pair) Test(org.junit.Test)

Example 27 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class IndexTaskTest method testOverwriteWithSameSegmentGranularity.

@Test
public void testOverwriteWithSameSegmentGranularity() throws Exception {
    final File tmpDir = temporaryFolder.newFolder();
    final File tmpFile = File.createTempFile("druid", "index", tmpDir);
    populateRollupTestData(tmpFile);
    for (int i = 0; i < 2; i++) {
        final IndexTask indexTask = new IndexTask(null, null, createDefaultIngestionSpec(jsonMapper, tmpDir, new UniformGranularitySpec(Granularities.DAY, Granularities.DAY, true, null), null, createTuningConfig(3, 2, null, 2L, null, false, true), false, false), null);
        final List<DataSegment> segments = runTask(indexTask).rhs;
        Assert.assertEquals(5, segments.size());
        final Interval expectedInterval = Intervals.of("2014-01-01T00:00:00.000Z/2014-01-02T00:00:00.000Z");
        for (int j = 0; j < 5; j++) {
            final DataSegment segment = segments.get(j);
            Assert.assertEquals(DATASOURCE, segment.getDataSource());
            Assert.assertEquals(expectedInterval, segment.getInterval());
            if (i == 0) {
                Assert.assertEquals(NumberedShardSpec.class, segment.getShardSpec().getClass());
                Assert.assertEquals(j, segment.getShardSpec().getPartitionNum());
            } else {
                if (lockGranularity == LockGranularity.SEGMENT) {
                    Assert.assertEquals(NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
                    final NumberedOverwriteShardSpec numberedOverwriteShardSpec = (NumberedOverwriteShardSpec) segment.getShardSpec();
                    Assert.assertEquals(j + PartitionIds.NON_ROOT_GEN_START_PARTITION_ID, numberedOverwriteShardSpec.getPartitionNum());
                    Assert.assertEquals(1, numberedOverwriteShardSpec.getMinorVersion());
                    Assert.assertEquals(5, numberedOverwriteShardSpec.getAtomicUpdateGroupSize());
                    Assert.assertEquals(0, numberedOverwriteShardSpec.getStartRootPartitionId());
                    Assert.assertEquals(5, numberedOverwriteShardSpec.getEndRootPartitionId());
                } else {
                    Assert.assertEquals(NumberedShardSpec.class, segment.getShardSpec().getClass());
                    final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) segment.getShardSpec();
                    Assert.assertEquals(j, numberedShardSpec.getPartitionNum());
                }
            }
        }
    }
}
Also used : UniformGranularitySpec(org.apache.druid.segment.indexing.granularity.UniformGranularitySpec) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) File(java.io.File) DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 28 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class RangePartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSuccessfullyAppend.

@Test
public void testAppendLinearlyPartitionedSegmentsToHashPartitionedDatasourceSuccessfullyAppend() {
    if (useMultivalueDim) {
        return;
    }
    final int targetRowsPerSegment = NUM_ROW / DIM_FILE_CARDINALITY / NUM_PARTITION;
    final Set<DataSegment> publishedSegments = new HashSet<>();
    publishedSegments.addAll(runTestTask(new SingleDimensionPartitionsSpec(targetRowsPerSegment, null, DIM1, false), TaskState.SUCCESS, false));
    // Append
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
    // And append again
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segments = entry.getValue();
        final List<DataSegment> rangedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == SingleDimensionShardSpec.class).collect(Collectors.toList());
        final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
        for (DataSegment rangedSegment : rangedSegments) {
            final SingleDimensionShardSpec rangeShardSpec = (SingleDimensionShardSpec) rangedSegment.getShardSpec();
            for (DataSegment linearSegment : linearSegments) {
                Assert.assertEquals(rangedSegment.getInterval(), linearSegment.getInterval());
                Assert.assertEquals(rangedSegment.getVersion(), linearSegment.getVersion());
                final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
                Assert.assertEquals(rangeShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
                Assert.assertTrue(rangeShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
            }
        }
    }
}
Also used : ArrayListMultimap(com.google.common.collect.ArrayListMultimap) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) DimensionRangeShardSpec(org.apache.druid.timeline.partition.DimensionRangeShardSpec) HashMultimap(com.google.common.collect.HashMultimap) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) Path(java.nio.file.Path) Parameterized(org.junit.runners.Parameterized) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) InputFormat(org.apache.druid.data.input.InputFormat) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) List(java.util.List) ProvideSystemProperty(org.junit.contrib.java.lang.system.ProvideSystemProperty) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Entry(java.util.Map.Entry) StringTuple(org.apache.druid.data.input.StringTuple) Intervals(org.apache.druid.java.util.common.Intervals) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) Multimap(com.google.common.collect.Multimap) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) LinkedHashMap(java.util.LinkedHashMap) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) DimensionRangePartitionsSpec(org.apache.druid.indexer.partitions.DimensionRangePartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Nullable(javax.annotation.Nullable) Before(org.junit.Before) Files(java.nio.file.Files) Matchers(org.hamcrest.Matchers) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) SetMultimap(com.google.common.collect.SetMultimap) File(java.io.File) NullValueHandlingConfig(org.apache.druid.common.config.NullValueHandlingConfig) Rule(org.junit.Rule) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Assert(org.junit.Assert) Collections(java.util.Collections) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) SingleDimensionPartitionsSpec(org.apache.druid.indexer.partitions.SingleDimensionPartitionsSpec) DataSegment(org.apache.druid.timeline.DataSegment) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 29 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class HashPartitionMultiPhaseParallelIndexingTest method testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend.

@Test
public void testAppendLinearlyPartitionedSegmensToHashPartitionedDatasourceSuccessfullyAppend() {
    final Set<DataSegment> publishedSegments = new HashSet<>();
    publishedSegments.addAll(runTestTask(new HashedPartitionsSpec(null, numShards, ImmutableList.of("dim1", "dim2")), TaskState.SUCCESS, false));
    // Append
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(5, null), TaskState.SUCCESS, true));
    // And append again
    publishedSegments.addAll(runTestTask(new DynamicPartitionsSpec(10, null), TaskState.SUCCESS, true));
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    publishedSegments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final List<DataSegment> segments = entry.getValue();
        final List<DataSegment> hashedSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == HashBasedNumberedShardSpec.class).collect(Collectors.toList());
        final List<DataSegment> linearSegments = segments.stream().filter(segment -> segment.getShardSpec().getClass() == NumberedShardSpec.class).collect(Collectors.toList());
        for (DataSegment hashedSegment : hashedSegments) {
            final HashBasedNumberedShardSpec hashShardSpec = (HashBasedNumberedShardSpec) hashedSegment.getShardSpec();
            for (DataSegment linearSegment : linearSegments) {
                Assert.assertEquals(hashedSegment.getInterval(), linearSegment.getInterval());
                Assert.assertEquals(hashedSegment.getVersion(), linearSegment.getVersion());
                final NumberedShardSpec numberedShardSpec = (NumberedShardSpec) linearSegment.getShardSpec();
                Assert.assertEquals(hashShardSpec.getNumCorePartitions(), numberedShardSpec.getNumCorePartitions());
                Assert.assertTrue(hashShardSpec.getPartitionNum() < numberedShardSpec.getPartitionNum());
            }
        }
    }
}
Also used : Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) Intervals(org.apache.druid.java.util.common.Intervals) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) RunWith(org.junit.runner.RunWith) HashMap(java.util.HashMap) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) CSVParseSpec(org.apache.druid.data.input.impl.CSVParseSpec) ImmutableList(com.google.common.collect.ImmutableList) Map(java.util.Map) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) Parameterized(org.junit.runners.Parameterized) Nullable(javax.annotation.Nullable) HashPartitionFunction(org.apache.druid.timeline.partition.HashPartitionFunction) Before(org.junit.Before) ParseSpec(org.apache.druid.data.input.impl.ParseSpec) DateTimes(org.apache.druid.java.util.common.DateTimes) ScanResultValue(org.apache.druid.query.scan.ScanResultValue) Files(java.nio.file.Files) InputFormat(org.apache.druid.data.input.InputFormat) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CsvInputFormat(org.apache.druid.data.input.impl.CsvInputFormat) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) Test(org.junit.Test) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) LockGranularity(org.apache.druid.indexing.common.LockGranularity) File(java.io.File) StandardCharsets(java.nio.charset.StandardCharsets) TaskState(org.apache.druid.indexer.TaskState) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) Writer(java.io.Writer) Entry(java.util.Map.Entry) Assert(org.junit.Assert) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) HashedPartitionsSpec(org.apache.druid.indexer.partitions.HashedPartitionsSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) DynamicPartitionsSpec(org.apache.druid.indexer.partitions.DynamicPartitionsSpec) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) List(java.util.List) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashSet(java.util.HashSet) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 30 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method buildSegment.

private static DataSegment buildSegment(Integer shardNumber) {
    Preconditions.checkArgument(shardNumber < MAX_SHARD_NUMBER);
    Preconditions.checkArgument(shardNumber >= 0);
    return new DataSegment(DATA_SOURCE_NAME, Intervals.ETERNITY, DATA_SOURCE_VERSION, ImmutableMap.of("type", "local", "path", PERSIST_DIR.getAbsolutePath()), ImmutableList.of(DIM_NAME), ImmutableList.of(METRIC_LONG_NAME, METRIC_FLOAT_NAME), new NumberedShardSpec(shardNumber, MAX_SHARD_NUMBER), BINARY_VERSION, 0L);
}
Also used : DataSegment(org.apache.druid.timeline.DataSegment) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec)

Aggregations

NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)58 Test (org.junit.Test)45 DataSegment (org.apache.druid.timeline.DataSegment)41 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)26 ImmutableList (com.google.common.collect.ImmutableList)24 List (java.util.List)24 ArrayList (java.util.ArrayList)23 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)14 Interval (org.joda.time.Interval)14 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 IOException (java.io.IOException)12 File (java.io.File)11 Map (java.util.Map)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 HashMap (java.util.HashMap)10 TaskStatus (org.apache.druid.indexer.TaskStatus)9 Before (org.junit.Before)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 NoopTask (org.apache.druid.indexing.common.task.NoopTask)8 Task (org.apache.druid.indexing.common.task.Task)8