Search in sources :

Example 1 with ShardSpec

use of org.apache.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class NewestSegmentFirstPolicyTest method createTimeline.

private static VersionedIntervalTimeline<String, DataSegment> createTimeline(SegmentGenerateSpec... specs) {
    List<DataSegment> segments = new ArrayList<>();
    final String version = DateTimes.nowUtc().toString();
    final List<SegmentGenerateSpec> orderedSpecs = Arrays.asList(specs);
    orderedSpecs.sort(Comparator.comparing(s -> s.totalInterval, Comparators.intervalsByStartThenEnd().reversed()));
    for (SegmentGenerateSpec spec : orderedSpecs) {
        Interval remainingInterval = spec.totalInterval;
        while (!Intervals.isEmpty(remainingInterval)) {
            final Interval segmentInterval;
            if (remainingInterval.toDuration().isLongerThan(spec.segmentPeriod.toStandardDuration())) {
                segmentInterval = new Interval(spec.segmentPeriod, remainingInterval.getEnd());
            } else {
                segmentInterval = remainingInterval;
            }
            for (int i = 0; i < spec.numSegmentsPerShard; i++) {
                final ShardSpec shardSpec = new NumberedShardSpec(i, spec.numSegmentsPerShard);
                final DataSegment segment = new DataSegment(DATA_SOURCE, segmentInterval, spec.version == null ? version : spec.version, null, ImmutableList.of(), ImmutableList.of(), shardSpec, spec.lastCompactionState, 0, spec.segmentSize);
                segments.add(segment);
            }
            remainingInterval = SegmentCompactionUtil.removeIntervalFromEnd(remainingInterval, segmentInterval);
        }
    }
    return VersionedIntervalTimeline.forSegments(segments);
}
Also used : DateTimeZone(org.joda.time.DateTimeZone) Arrays(java.util.Arrays) Comparators(org.apache.druid.java.util.common.guava.Comparators) Partitions(org.apache.druid.timeline.Partitions) UserCompactionTaskDimensionsConfig(org.apache.druid.server.coordinator.UserCompactionTaskDimensionsConfig) IndexSpec(org.apache.druid.segment.IndexSpec) CompactionState(org.apache.druid.timeline.CompactionState) DataSourceCompactionConfig(org.apache.druid.server.coordinator.DataSourceCompactionConfig) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) SelectorDimFilter(org.apache.druid.query.filter.SelectorDimFilter) PeriodGranularity(org.apache.druid.java.util.common.granularity.PeriodGranularity) Map(java.util.Map) Assertions(org.assertj.core.api.Assertions) TypeReference(com.fasterxml.jackson.core.type.TypeReference) ConciseBitmapSerdeFactory(org.apache.druid.segment.data.ConciseBitmapSerdeFactory) DateTimes(org.apache.druid.java.util.common.DateTimes) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) ImmutableSet(com.google.common.collect.ImmutableSet) ImmutableMap(com.google.common.collect.ImmutableMap) TimeZone(java.util.TimeZone) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) ClientCompactionTaskQueryTuningConfig(org.apache.druid.client.indexing.ClientCompactionTaskQueryTuningConfig) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) Collectors(java.util.stream.Collectors) UserCompactionTaskTransformConfig(org.apache.druid.server.coordinator.UserCompactionTaskTransformConfig) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) TransformSpec(org.apache.druid.segment.transform.TransformSpec) Iterables(com.google.common.collect.Iterables) InjectableValues(com.fasterxml.jackson.databind.InjectableValues) Intervals(org.apache.druid.java.util.common.Intervals) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) ImmutableList(com.google.common.collect.ImmutableList) UserCompactionTaskGranularityConfig(org.apache.druid.server.coordinator.UserCompactionTaskGranularityConfig) PartitionsSpec(org.apache.druid.indexer.partitions.PartitionsSpec) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) Period(org.joda.time.Period) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) Test(org.junit.Test) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) Granularities(org.apache.druid.java.util.common.granularity.Granularities) NullHandling(org.apache.druid.common.config.NullHandling) Preconditions(com.google.common.base.Preconditions) Assert(org.junit.Assert) Comparator(java.util.Comparator) Collections(java.util.Collections) ArrayList(java.util.ArrayList) DataSegment(org.apache.druid.timeline.DataSegment) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) Interval(org.joda.time.Interval)

Example 2 with ShardSpec

use of org.apache.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class CompactSegmentsTest method createSegment.

private DataSegment createSegment(String dataSource, int startDay, boolean beforeNoon, int partition) {
    final ShardSpec shardSpec = shardSpecFactory.apply(partition, 2);
    final Interval interval = beforeNoon ? Intervals.of(StringUtils.format("2017-01-%02dT00:00:00/2017-01-%02dT12:00:00", startDay + 1, startDay + 1)) : Intervals.of(StringUtils.format("2017-01-%02dT12:00:00/2017-01-%02dT00:00:00", startDay + 1, startDay + 2));
    return new DataSegment(dataSource, interval, "version", null, ImmutableList.of(), ImmutableList.of(), shardSpec, 0, 10L);
}
Also used : DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Interval(org.joda.time.Interval)

Example 3 with ShardSpec

use of org.apache.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class SegmentPublisherHelper method annotateShardSpec.

/**
 * This method fills missing information in the shard spec if necessary when publishing segments.
 *
 * - When time chunk lock is used, the non-appending task should set the proper size of the core partitions for
 *   dynamically-partitioned segments. See {@link #annotateCorePartitionSetSizeFn}.
 * - When segment lock is used, the overwriting task should set the proper size of the atomic update group.
 *   See {@link #annotateAtomicUpdateGroupFn}.
 */
static Set<DataSegment> annotateShardSpec(Set<DataSegment> segments) {
    final Map<Interval, List<DataSegment>> intervalToSegments = new HashMap<>();
    segments.forEach(segment -> intervalToSegments.computeIfAbsent(segment.getInterval(), k -> new ArrayList<>()).add(segment));
    for (Entry<Interval, List<DataSegment>> entry : intervalToSegments.entrySet()) {
        final Interval interval = entry.getKey();
        final List<DataSegment> segmentsPerInterval = entry.getValue();
        final ShardSpec firstShardSpec = segmentsPerInterval.get(0).getShardSpec();
        final boolean anyMismatch = segmentsPerInterval.stream().anyMatch(segment -> segment.getShardSpec().getClass() != firstShardSpec.getClass());
        if (anyMismatch) {
            throw new ISE("Mismatched shardSpecs in interval[%s] for segments[%s]", interval, segmentsPerInterval);
        }
        final Function<DataSegment, DataSegment> annotateFn;
        if (firstShardSpec instanceof OverwriteShardSpec) {
            annotateFn = annotateAtomicUpdateGroupFn(segmentsPerInterval.size());
        } else if (firstShardSpec instanceof BuildingShardSpec) {
            // sanity check
            // BuildingShardSpec is used in non-appending mode. In this mode,
            // the segments in each interval should have contiguous partitionIds,
            // so that they can be queryable (see PartitionHolder.isComplete()).
            int expectedCorePartitionSetSize = segmentsPerInterval.size();
            int actualCorePartitionSetSize = Math.toIntExact(segmentsPerInterval.stream().filter(segment -> segment.getShardSpec().getPartitionNum() < expectedCorePartitionSetSize).count());
            if (expectedCorePartitionSetSize != actualCorePartitionSetSize) {
                LOG.errorSegments(segmentsPerInterval, "Cannot publish segments due to incomplete time chunk");
                throw new ISE("Cannot publish segments due to incomplete time chunk for interval[%s]. " + "Expected [%s] segments in the core partition, but only [%] segments are found. " + "See task logs for more details about these segments.", interval, expectedCorePartitionSetSize, actualCorePartitionSetSize);
            }
            annotateFn = annotateCorePartitionSetSizeFn(expectedCorePartitionSetSize);
        } else if (firstShardSpec instanceof BucketNumberedShardSpec) {
            throw new ISE("Cannot publish segments with shardSpec[%s]", firstShardSpec);
        } else {
            annotateFn = null;
        }
        if (annotateFn != null) {
            intervalToSegments.put(interval, segmentsPerInterval.stream().map(annotateFn).collect(Collectors.toList()));
        }
    }
    return intervalToSegments.values().stream().flatMap(Collection::stream).collect(Collectors.toSet());
}
Also used : Logger(org.apache.druid.java.util.common.logger.Logger) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) Collection(java.util.Collection) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) Set(java.util.Set) ISE(org.apache.druid.java.util.common.ISE) HashMap(java.util.HashMap) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) List(java.util.List) Map(java.util.Map) DataSegment(org.apache.druid.timeline.DataSegment) Entry(java.util.Map.Entry) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) BucketNumberedShardSpec(org.apache.druid.timeline.partition.BucketNumberedShardSpec) ArrayList(java.util.ArrayList) List(java.util.List) ISE(org.apache.druid.java.util.common.ISE) BuildingShardSpec(org.apache.druid.timeline.partition.BuildingShardSpec) OverwriteShardSpec(org.apache.druid.timeline.partition.OverwriteShardSpec) Interval(org.joda.time.Interval)

Example 4 with ShardSpec

use of org.apache.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class CachingClusteredClientTest method populateTimeline.

private List<Map<DruidServer, ServerExpectations>> populateTimeline(List<Interval> queryIntervals, List<List<Iterable<Result<Object>>>> expectedResults, int numQueryIntervals, List<Object> mocks) {
    timeline = new VersionedIntervalTimeline<>(Ordering.natural());
    final List<Map<DruidServer, ServerExpectations>> serverExpectationList = new ArrayList<>();
    for (int k = 0; k < numQueryIntervals + 1; ++k) {
        final int numChunks = expectedResults.get(k).size();
        final TreeMap<DruidServer, ServerExpectations> serverExpectations = new TreeMap<>();
        serverExpectationList.add(serverExpectations);
        for (int j = 0; j < numChunks; ++j) {
            DruidServer lastServer = servers[random.nextInt(servers.length)];
            serverExpectations.computeIfAbsent(lastServer, server -> new ServerExpectations(server, makeMock(mocks, QueryRunner.class)));
            final ShardSpec shardSpec;
            if (numChunks == 1) {
                shardSpec = new SingleDimensionShardSpec("dimAll", null, null, 0, 1);
            } else {
                String start = null;
                String end = null;
                if (j > 0) {
                    start = String.valueOf(j);
                }
                if (j + 1 < numChunks) {
                    end = String.valueOf(j + 1);
                }
                shardSpec = new SingleDimensionShardSpec("dim" + k, start, end, j, numChunks);
            }
            DataSegment mockSegment = makeMock(mocks, DataSegment.class);
            ServerExpectation<Object> expectation = new ServerExpectation<>(// interval/chunk
            SegmentId.dummy(StringUtils.format("%s_%s", k, j)), queryIntervals.get(k), mockSegment, shardSpec, expectedResults.get(k).get(j));
            serverExpectations.get(lastServer).addExpectation(expectation);
            EasyMock.expect(mockSegment.getSize()).andReturn(0L).anyTimes();
            EasyMock.replay(mockSegment);
            ServerSelector selector = new ServerSelector(expectation.getSegment(), new HighestPriorityTierSelectorStrategy(new RandomServerSelectorStrategy()));
            selector.addServerAndUpdateSegment(new QueryableDruidServer(lastServer, null), selector.getSegment());
            EasyMock.reset(mockSegment);
            EasyMock.expect(mockSegment.getShardSpec()).andReturn(shardSpec).anyTimes();
            timeline.add(queryIntervals.get(k), String.valueOf(k), shardSpec.createChunk(selector));
        }
    }
    return serverExpectationList;
}
Also used : ArrayList(java.util.ArrayList) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer) TreeMap(java.util.TreeMap) DataSegment(org.apache.druid.timeline.DataSegment) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) QueryableDruidServer(org.apache.druid.client.selector.QueryableDruidServer) ServerSelector(org.apache.druid.client.selector.ServerSelector) HighestPriorityTierSelectorStrategy(org.apache.druid.client.selector.HighestPriorityTierSelectorStrategy) Map(java.util.Map) TreeMap(java.util.TreeMap) ImmutableMap(com.google.common.collect.ImmutableMap) HashMap(java.util.HashMap) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) RandomServerSelectorStrategy(org.apache.druid.client.selector.RandomServerSelectorStrategy)

Example 5 with ShardSpec

use of org.apache.druid.timeline.partition.ShardSpec in project druid by druid-io.

the class IndexGeneratorJobTest method loadShardSpecs.

private Map<Long, List<HadoopyShardSpec>> loadShardSpecs(String partitionType, Object[][][] shardInfoForEachShard) {
    Map<Long, List<HadoopyShardSpec>> shardSpecs = new TreeMap<>(DateTimeComparator.getInstance());
    int shardCount = 0;
    int segmentNum = 0;
    for (Interval segmentGranularity : config.getSegmentGranularIntervals()) {
        List<ShardSpec> specs = constructShardSpecFromShardInfo(partitionType, shardInfoForEachShard[segmentNum++]);
        List<HadoopyShardSpec> actualSpecs = Lists.newArrayListWithExpectedSize(specs.size());
        for (ShardSpec spec : specs) {
            actualSpecs.add(new HadoopyShardSpec(spec, shardCount++));
        }
        shardSpecs.put(segmentGranularity.getStartMillis(), actualSpecs);
    }
    return shardSpecs;
}
Also used : List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) TreeMap(java.util.TreeMap) ShardSpec(org.apache.druid.timeline.partition.ShardSpec) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) HashBasedNumberedShardSpec(org.apache.druid.timeline.partition.HashBasedNumberedShardSpec) SingleDimensionShardSpec(org.apache.druid.timeline.partition.SingleDimensionShardSpec) Interval(org.joda.time.Interval)

Aggregations

ShardSpec (org.apache.druid.timeline.partition.ShardSpec)20 Interval (org.joda.time.Interval)13 ArrayList (java.util.ArrayList)8 DataSegment (org.apache.druid.timeline.DataSegment)8 NumberedShardSpec (org.apache.druid.timeline.partition.NumberedShardSpec)8 List (java.util.List)7 HashBasedNumberedShardSpec (org.apache.druid.timeline.partition.HashBasedNumberedShardSpec)6 SingleDimensionShardSpec (org.apache.druid.timeline.partition.SingleDimensionShardSpec)6 Test (org.junit.Test)6 ImmutableList (com.google.common.collect.ImmutableList)5 HashMap (java.util.HashMap)5 ImmutableMap (com.google.common.collect.ImmutableMap)3 Map (java.util.Map)3 TreeMap (java.util.TreeMap)3 ISE (org.apache.druid.java.util.common.ISE)3 BucketNumberedShardSpec (org.apache.druid.timeline.partition.BucketNumberedShardSpec)3 DateTime (org.joda.time.DateTime)3 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)2 IOException (java.io.IOException)2 Collectors (java.util.stream.Collectors)2