Search in sources :

Example 31 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyBenchmark method setup.

@Setup(Level.Trial)
public void setup() {
    compactionConfigs = new HashMap<>();
    for (int i = 0; i < numDataSources; i++) {
        final String dataSource = DATA_SOURCE_PREFIX + i;
        compactionConfigs.put(dataSource, new DataSourceCompactionConfig(dataSource, 0, inputSegmentSizeBytes, null, null, null, null, null, null, null, null, null));
    }
    List<DataSegment> segments = new ArrayList<>();
    for (int i = 0; i < numDataSources; i++) {
        final String dataSource = DATA_SOURCE_PREFIX + i;
        final int startYear = ThreadLocalRandom.current().nextInt(2000, 2040);
        DateTime date = DateTimes.of(startYear, 1, 1, 0, 0);
        for (int j = 0; j < numDayIntervalsPerDataSource; j++, date = date.plusDays(1)) {
            for (int k = 0; k < numPartitionsPerDayInterval; k++) {
                final ShardSpec shardSpec = new NumberedShardSpec(numPartitionsPerDayInterval, k);
                final DataSegment segment = new DataSegment(dataSource, new Interval(date, date.plusDays(1)), "version", null, ImmutableList.of(), ImmutableList.of(), shardSpec, 0, segmentSizeBytes);
                segments.add(segment);
            }
        }
    }
    dataSources = DataSourcesSnapshot.fromUsedSegments(segments, ImmutableMap.of()).getUsedSegmentsTimelinesPerDataSource();
}
Also used : ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Setup(org.openjdk.jmh.annotations.Setup)

Example 32 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class VersionedIntervalTimelineBenchmark method setup.

@Setup
public void setup() {
    final int numNewRootGenSegmentsAfterCompaction = (int) (numInitialRootGenSegmentsPerInterval * NEW_ROOT_GEN_SEGMENTS_RATIO_AFTER_COMPACTION);
    final int numCompactedSegments = (int) (numInitialRootGenSegmentsPerInterval * COMPACTED_SEGMENTS_RATIO_TO_INITIAL_SEGMENTS);
    intervals = Lists.newArrayList(segmentGranularity.getDefaultGranularity().getIterable(TOTAL_INTERVAL));
    segments = new ArrayList<>(intervals.size() * numInitialRootGenSegmentsPerInterval);
    Map<Interval, Integer> nextRootGenPartitionIds = Maps.newHashMapWithExpectedSize(intervals.size());
    Map<Interval, Integer> nextNonRootGenPartitionIds = Maps.newHashMapWithExpectedSize(intervals.size());
    Map<Interval, Short> nextMinorVersions = Maps.newHashMapWithExpectedSize(intervals.size());
    DateTime majorVersion = DateTimes.nowUtc();
    for (Interval interval : intervals) {
        majorVersion = majorVersion.plus(1);
        int nextRootGenPartitionId = 0;
        int nextNonRootGenPartitionId = PartitionIds.NON_ROOT_GEN_START_PARTITION_ID;
        // Generate root generation segments
        for (int i = 0; i < numInitialRootGenSegmentsPerInterval; i++) {
            segments.add(newSegment(interval, majorVersion.toString(), new NumberedShardSpec(nextRootGenPartitionId++, 0)));
        }
        for (int i = 0; i < numNonRootGenerations; i++) {
            if (!useSegmentLock) {
                majorVersion = majorVersion.plus(1);
                nextRootGenPartitionId = 0;
            }
            // Compacted segments
            for (int j = 0; j < numCompactedSegments; j++) {
                if (useSegmentLock) {
                    segments.add(newSegment(interval, majorVersion.toString(), new NumberedOverwriteShardSpec(nextNonRootGenPartitionId++, 0, nextRootGenPartitionId, (short) (i + 1), (short) numCompactedSegments)));
                } else {
                    segments.add(newSegment(interval, majorVersion.toString(), new NumberedShardSpec(nextRootGenPartitionId++, 0)));
                }
            }
            // New segments
            for (int j = 0; j < numNewRootGenSegmentsAfterCompaction; j++) {
                segments.add(newSegment(interval, majorVersion.toString(), new NumberedShardSpec(nextRootGenPartitionId++, 0)));
            }
        }
        nextRootGenPartitionIds.put(interval, nextRootGenPartitionId);
        nextNonRootGenPartitionIds.put(interval, nextNonRootGenPartitionId);
        nextMinorVersions.put(interval, (short) (numNonRootGenerations + 1));
    }
    timeline = VersionedIntervalTimeline.forSegments(segments);
    newSegments = new ArrayList<>(200);
    // Generate new appending segments
    for (int i = 0; i < 100; i++) {
        final Interval interval = intervals.get(ThreadLocalRandom.current().nextInt(intervals.size()));
        final int rootPartitionId = nextRootGenPartitionIds.get(interval);
        newSegments.add(newSegment(interval, majorVersion.toString(), new NumberedShardSpec(rootPartitionId, 0)));
        nextRootGenPartitionIds.put(interval, rootPartitionId + 1);
    }
    // Generate overwriting segments
    if (!useSegmentLock) {
        majorVersion = majorVersion.plus(1);
        nextRootGenPartitionIds.keySet().forEach(interval -> nextRootGenPartitionIds.put(interval, 0));
    }
    final List<Interval> intervalCopies = new ArrayList<>(intervals);
    for (int i = 0; i < 100 && !intervalCopies.isEmpty(); i++) {
        final Interval interval = intervalCopies.remove(ThreadLocalRandom.current().nextInt(intervalCopies.size()));
        int rootPartitionId = nextRootGenPartitionIds.remove(interval);
        int nonRootPartitionId = nextNonRootGenPartitionIds.remove(interval);
        final short minorVersion = nextMinorVersions.remove(interval);
        for (int j = 0; j < numCompactedSegments; j++) {
            if (useSegmentLock) {
                newSegments.add(newSegment(interval, majorVersion.toString(), new NumberedOverwriteShardSpec(nonRootPartitionId++, 0, rootPartitionId, minorVersion, (short) numCompactedSegments)));
            } else {
                newSegments.add(newSegment(interval, majorVersion.toString(), new NumberedShardSpec(rootPartitionId++, 0)));
            }
        }
    }
}
Also used : ArrayList(java.util.ArrayList) DateTime(org.joda.time.DateTime) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Setup(org.openjdk.jmh.annotations.Setup)

Example 33 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class IngestSegmentFirehoseFactoryTest method testGetUniqueDimensionsAndMetrics.

@Test
public void testGetUniqueDimensionsAndMetrics() {
    final int numSegmentsPerPartitionChunk = 5;
    final int numPartitionChunksPerTimelineObject = 10;
    final int numSegments = numSegmentsPerPartitionChunk * numPartitionChunksPerTimelineObject;
    final Interval interval = Intervals.of("2017-01-01/2017-01-02");
    final String version = "1";
    final List<TimelineObjectHolder<String, DataSegment>> timelineSegments = new ArrayList<>();
    for (int i = 0; i < numPartitionChunksPerTimelineObject; i++) {
        final List<PartitionChunk<DataSegment>> chunks = new ArrayList<>();
        for (int j = 0; j < numSegmentsPerPartitionChunk; j++) {
            final List<String> dims = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "dim" + suffix).collect(Collectors.toList());
            final List<String> metrics = IntStream.range(i, i + numSegmentsPerPartitionChunk).mapToObj(suffix -> "met" + suffix).collect(Collectors.toList());
            final DataSegment segment = new DataSegment("ds", interval, version, ImmutableMap.of(), dims, metrics, new NumberedShardSpec(numPartitionChunksPerTimelineObject, i), 1, 1);
            final PartitionChunk<DataSegment> partitionChunk = new NumberedPartitionChunk<>(i, numPartitionChunksPerTimelineObject, segment);
            chunks.add(partitionChunk);
        }
        final TimelineObjectHolder<String, DataSegment> timelineHolder = new TimelineObjectHolder<>(interval, version, new PartitionHolder<>(chunks));
        timelineSegments.add(timelineHolder);
    }
    final String[] expectedDims = new String[] { "dim9", "dim10", "dim11", "dim12", "dim13", "dim8", "dim7", "dim6", "dim5", "dim4", "dim3", "dim2", "dim1", "dim0" };
    final String[] expectedMetrics = new String[] { "met9", "met10", "met11", "met12", "met13", "met8", "met7", "met6", "met5", "met4", "met3", "met2", "met1", "met0" };
    Assert.assertEquals(Arrays.asList(expectedDims), ReingestionTimelineUtils.getUniqueDimensions(timelineSegments, null));
    Assert.assertEquals(Arrays.asList(expectedMetrics), ReingestionTimelineUtils.getUniqueMetrics(timelineSegments));
}
Also used : Module(com.google.inject.Module) SegmentCacheManagerFactory(org.apache.druid.indexing.common.SegmentCacheManagerFactory) Arrays(java.util.Arrays) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) IndexSpec(org.apache.druid.segment.IndexSpec) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) JodaUtils(org.apache.druid.java.util.common.JodaUtils) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) Task(org.apache.druid.indexing.common.task.Task) After(org.junit.After) Map(java.util.Map) ExpressionTransform(org.apache.druid.segment.transform.ExpressionTransform) FileUtils(org.apache.druid.java.util.common.FileUtils) Parameterized(org.junit.runners.Parameterized) RetryPolicyConfig(org.apache.druid.indexing.common.RetryPolicyConfig) AfterClass(org.junit.AfterClass) ImmutableSet(com.google.common.collect.ImmutableSet) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) RetryPolicyFactory(org.apache.druid.indexing.common.RetryPolicyFactory) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Collection(java.util.Collection) Segments(org.apache.druid.indexing.overlord.Segments) StringUtils(org.apache.druid.java.util.common.StringUtils) Set(java.util.Set) CombiningFirehoseFactory(org.apache.druid.segment.realtime.firehose.CombiningFirehoseFactory) InputRowParser(org.apache.druid.data.input.impl.InputRowParser) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) Collectors(java.util.stream.Collectors) GuiceInjectors(org.apache.druid.guice.GuiceInjectors) TestUtils(org.apache.druid.indexing.common.TestUtils) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexerSQLMetadataStorageCoordinator(org.apache.druid.metadata.IndexerSQLMetadataStorageCoordinator) InputRow(org.apache.druid.data.input.InputRow) NoopTask(org.apache.druid.indexing.common.task.NoopTask) Firehose(org.apache.druid.data.input.Firehose) PartitionHolder(org.apache.druid.timeline.partition.PartitionHolder) List(java.util.List) GuiceAnnotationIntrospector(org.apache.druid.guice.GuiceAnnotationIntrospector) ServiceEmitter(org.apache.druid.java.util.emitter.service.ServiceEmitter) DataSegment(org.apache.druid.timeline.DataSegment) SegmentHandoffNotifierFactory(org.apache.druid.segment.handoff.SegmentHandoffNotifierFactory) TaskStorage(org.apache.druid.indexing.overlord.TaskStorage) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Logger(org.apache.druid.java.util.common.logger.Logger) IntStream(java.util.stream.IntStream) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) TaskStorageConfig(org.apache.druid.indexing.common.config.TaskStorageConfig) RunWith(org.junit.runner.RunWith) AnnotationIntrospectorPair(com.fasterxml.jackson.databind.introspect.AnnotationIntrospectorPair) TimeAndDimsParseSpec(org.apache.druid.data.input.impl.TimeAndDimsParseSpec) ArrayList(java.util.ArrayList) HashSet(java.util.HashSet) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) Interval(org.joda.time.Interval) ColumnHolder(org.apache.druid.segment.column.ColumnHolder) ImmutableList(com.google.common.collect.ImmutableList) Binder(com.google.inject.Binder) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) CoordinatorClient(org.apache.druid.client.coordinator.CoordinatorClient) IndexMergerV9(org.apache.druid.segment.IndexMergerV9) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) Before(org.junit.Before) ReingestionTimelineUtils(org.apache.druid.indexing.common.ReingestionTimelineUtils) FirehoseFactory(org.apache.druid.data.input.FirehoseFactory) TaskLockbox(org.apache.druid.indexing.overlord.TaskLockbox) MapInputRowParser(org.apache.druid.data.input.impl.MapInputRowParser) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) JSONParseSpec(org.apache.druid.data.input.impl.JSONParseSpec) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) IOException(java.io.IOException) EasyMock(org.easymock.EasyMock) File(java.io.File) TestHelper(org.apache.druid.segment.TestHelper) Rule(org.junit.Rule) HeapMemoryTaskStorage(org.apache.druid.indexing.overlord.HeapMemoryTaskStorage) Paths(java.nio.file.Paths) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) IndexIO(org.apache.druid.segment.IndexIO) GuiceInjectableValues(org.apache.druid.guice.GuiceInjectableValues) TemporaryFolder(org.junit.rules.TemporaryFolder) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) TimelineObjectHolder(org.apache.druid.timeline.TimelineObjectHolder) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) PartitionChunk(org.apache.druid.timeline.partition.PartitionChunk) NumberedPartitionChunk(org.apache.druid.timeline.partition.NumberedPartitionChunk) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 34 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method assertShardSpec.

private void assertShardSpec(ParallelIndexSupervisorTask task, LockGranularity actualLockGranularity, boolean appendToExisting, Collection<DataSegment> originalSegmentsIfAppend) {
    final Collection<DataSegment> segments = getIndexingServiceClient().getPublishedSegments(task);
    if (!appendToExisting && actualLockGranularity == LockGranularity.TIME_CHUNK) {
        // Initial write
        final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(segments);
        for (List<DataSegment> segmentsPerInterval : intervalToSegments.values()) {
            for (DataSegment segment : segmentsPerInterval) {
                Assert.assertSame(NumberedShardSpec.class, segment.getShardSpec().getClass());
                final NumberedShardSpec shardSpec = (NumberedShardSpec) segment.getShardSpec();
                Assert.assertEquals(segmentsPerInterval.size(), shardSpec.getNumCorePartitions());
            }
        }
    } else {
        // Append or initial write with segment lock
        final Map<Interval, List<DataSegment>> intervalToOriginalSegments = SegmentUtils.groupSegmentsByInterval(originalSegmentsIfAppend);
        for (DataSegment segment : segments) {
            Assert.assertSame(NumberedShardSpec.class, segment.getShardSpec().getClass());
            final NumberedShardSpec shardSpec = (NumberedShardSpec) segment.getShardSpec();
            final List<DataSegment> originalSegmentsInInterval = intervalToOriginalSegments.get(segment.getInterval());
            final int expectedNumCorePartitions = originalSegmentsInInterval == null || originalSegmentsInInterval.isEmpty() ? 0 : originalSegmentsInInterval.get(0).getShardSpec().getNumCorePartitions();
            Assert.assertEquals(expectedNumCorePartitions, shardSpec.getNumCorePartitions());
        }
    }
}
Also used : List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) DataSegment(org.apache.druid.timeline.DataSegment) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval)

Example 35 with NumberedShardSpec

use of org.apache.druid.timeline.partition.NumberedShardSpec in project druid by druid-io.

the class SinglePhaseParallelIndexingTest method assertShardSpecAfterOverwrite.

private void assertShardSpecAfterOverwrite(ParallelIndexSupervisorTask task, LockGranularity actualLockGranularity) {
    final Collection<DataSegment> segments = getIndexingServiceClient().getPublishedSegments(task);
    final Map<Interval, List<DataSegment>> intervalToSegments = SegmentUtils.groupSegmentsByInterval(segments);
    if (actualLockGranularity != LockGranularity.SEGMENT) {
        // Check the core partition set in the shardSpec
        for (List<DataSegment> segmentsPerInterval : intervalToSegments.values()) {
            for (DataSegment segment : segmentsPerInterval) {
                Assert.assertSame(NumberedShardSpec.class, segment.getShardSpec().getClass());
                final NumberedShardSpec shardSpec = (NumberedShardSpec) segment.getShardSpec();
                Assert.assertEquals(segmentsPerInterval.size(), shardSpec.getNumCorePartitions());
            }
        }
    } else {
        for (List<DataSegment> segmentsPerInterval : intervalToSegments.values()) {
            for (DataSegment segment : segmentsPerInterval) {
                Assert.assertSame(NumberedOverwriteShardSpec.class, segment.getShardSpec().getClass());
                final NumberedOverwriteShardSpec shardSpec = (NumberedOverwriteShardSpec) segment.getShardSpec();
                Assert.assertEquals(segmentsPerInterval.size(), shardSpec.getAtomicUpdateGroupSize());
            }
        }
    }
}
Also used : List(java.util.List) ImmutableList(com.google.common.collect.ImmutableList) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval)

Aggregations

NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)58 Test (org.junit.Test)45 DataSegment (org.apache.druid.timeline.DataSegment)41 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)26 ImmutableList (com.google.common.collect.ImmutableList)24 List (java.util.List)24 ArrayList (java.util.ArrayList)23 Builder (org.apache.druid.indexing.common.task.CompactionTask.Builder)14 Interval (org.joda.time.Interval)14 NumberedOverwriteShardSpec (org.apache.druid.timeline.partition.NumberedOverwriteShardSpec)13 IOException (java.io.IOException)12 File (java.io.File)11 Map (java.util.Map)11 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)10 HashMap (java.util.HashMap)10 TaskStatus (org.apache.druid.indexer.TaskStatus)9 Before (org.junit.Before)9 DimensionsSpec (org.apache.druid.data.input.impl.DimensionsSpec)8 NoopTask (org.apache.druid.indexing.common.task.NoopTask)8 Task (org.apache.druid.indexing.common.task.Task)8