Search in sources :

Example 11 with Segment

use of org.apache.druid.segment.Segment in project druid by druid-io.

the class IncrementalIndexTest method testSingleThreadedIndexingAndQuery.

@Test
public void testSingleThreadedIndexingAndQuery() throws Exception {
    final int dimensionCount = 5;
    final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>();
    ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        ingestAggregatorFactories.add(new LongSumAggregatorFactory(StringUtils.format("sumResult%s", i), StringUtils.format("Dim_%s", i)));
        ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(StringUtils.format("doubleSumResult%s", i), StringUtils.format("Dim_%s", i)));
    }
    final IncrementalIndex index = indexCreator.createIndex((Object) ingestAggregatorFactories.toArray(new AggregatorFactory[0]));
    final long timestamp = System.currentTimeMillis();
    final int rows = 50;
    // ingesting same data twice to have some merging happening
    for (int i = 0; i < rows; i++) {
        index.add(getLongRow(timestamp + i, dimensionCount));
    }
    for (int i = 0; i < rows; i++) {
        index.add(getLongRow(timestamp + i, dimensionCount));
    }
    // run a timeseries query on the index and verify results
    final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>();
    queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        queryAggregatorFactories.add(new LongSumAggregatorFactory(StringUtils.format("sumResult%s", i), StringUtils.format("sumResult%s", i)));
        queryAggregatorFactories.add(new DoubleSumAggregatorFactory(StringUtils.format("doubleSumResult%s", i), StringUtils.format("doubleSumResult%s", i)));
    }
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(Intervals.of("2000/2030"))).aggregators(queryAggregatorFactories).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
    List<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
    Result<TimeseriesResultValue> result = Iterables.getOnlyElement(results);
    boolean isRollup = index.isRollup();
    Assert.assertEquals(rows * (isRollup ? 1 : 2), result.getValue().getLongMetric("rows").intValue());
    for (int i = 0; i < dimensionCount; ++i) {
        Assert.assertEquals("Failed long sum on dimension " + i, 2 * rows, result.getValue().getLongMetric("sumResult" + i).intValue());
        Assert.assertEquals("Failed double sum on dimension " + i, 2 * rows, result.getValue().getDoubleMetric("doubleSumResult" + i).intValue());
    }
}
Also used : TimeseriesResultValue(org.apache.druid.query.timeseries.TimeseriesResultValue) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) TimeseriesQuery(org.apache.druid.query.timeseries.TimeseriesQuery) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(org.apache.druid.query.timeseries.TimeseriesQueryQueryToolChest) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) DoubleSumAggregatorFactory(org.apache.druid.query.aggregation.DoubleSumAggregatorFactory) FilteredAggregatorFactory(org.apache.druid.query.aggregation.FilteredAggregatorFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) IncrementalIndexSegment(org.apache.druid.segment.IncrementalIndexSegment) Segment(org.apache.druid.segment.Segment) Result(org.apache.druid.query.Result) TimeseriesQueryEngine(org.apache.druid.query.timeseries.TimeseriesQueryEngine) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryRunnerFactory(org.apache.druid.query.QueryRunnerFactory) TimeseriesQueryRunnerFactory(org.apache.druid.query.timeseries.TimeseriesQueryRunnerFactory) FinalizeResultsQueryRunner(org.apache.druid.query.FinalizeResultsQueryRunner) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 12 with Segment

use of org.apache.druid.segment.Segment in project druid by druid-io.

the class SegmentManagerTest method assertResult.

// TODO remove when the bug in intelliJ is fixed.
@SuppressWarnings("RedundantThrows")
private void assertResult(List<DataSegment> expectedExistingSegments) throws SegmentLoadingException {
    final Map<String, Long> expectedDataSourceSizes = expectedExistingSegments.stream().collect(Collectors.toMap(DataSegment::getDataSource, DataSegment::getSize, Long::sum));
    final Map<String, Long> expectedDataSourceCounts = expectedExistingSegments.stream().collect(Collectors.toMap(DataSegment::getDataSource, segment -> 1L, Long::sum));
    final Set<String> expectedDataSourceNames = expectedExistingSegments.stream().map(DataSegment::getDataSource).collect(Collectors.toSet());
    final Map<String, VersionedIntervalTimeline<String, ReferenceCountingSegment>> expectedTimelines = new HashMap<>();
    for (DataSegment segment : expectedExistingSegments) {
        final VersionedIntervalTimeline<String, ReferenceCountingSegment> expectedTimeline = expectedTimelines.computeIfAbsent(segment.getDataSource(), k -> new VersionedIntervalTimeline<>(Ordering.natural()));
        expectedTimeline.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(ReferenceCountingSegment.wrapSegment(SEGMENT_LOADER.getSegment(segment, false, SegmentLazyLoadFailCallback.NOOP), segment.getShardSpec())));
    }
    Assert.assertEquals(expectedDataSourceNames, segmentManager.getDataSourceNames());
    Assert.assertEquals(expectedDataSourceCounts, segmentManager.getDataSourceCounts());
    Assert.assertEquals(expectedDataSourceSizes, segmentManager.getDataSourceSizes());
    final Map<String, DataSourceState> dataSources = segmentManager.getDataSources();
    Assert.assertEquals(expectedTimelines.size(), dataSources.size());
    dataSources.forEach((sourceName, dataSourceState) -> {
        Assert.assertEquals(expectedDataSourceCounts.get(sourceName).longValue(), dataSourceState.getNumSegments());
        Assert.assertEquals(expectedDataSourceSizes.get(sourceName).longValue(), dataSourceState.getTotalSegmentSize());
        Assert.assertEquals(expectedTimelines.get(sourceName).getAllTimelineEntries(), dataSourceState.getTimeline().getAllTimelineEntries());
    });
}
Also used : DataSourceAnalysis(org.apache.druid.query.planning.DataSourceAnalysis) Intervals(org.apache.druid.java.util.common.Intervals) MapUtils(org.apache.druid.java.util.common.MapUtils) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) HashMap(java.util.HashMap) StorageAdapter(org.apache.druid.segment.StorageAdapter) SegmentLoader(org.apache.druid.segment.loading.SegmentLoader) DataSourceState(org.apache.druid.server.SegmentManager.DataSourceState) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) Future(java.util.concurrent.Future) ImmutableList(com.google.common.collect.ImmutableList) After(org.junit.After) Map(java.util.Map) NumberedOverwriteShardSpec(org.apache.druid.timeline.partition.NumberedOverwriteShardSpec) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) Segment(org.apache.druid.segment.Segment) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) ImmutableMap(com.google.common.collect.ImmutableMap) QueryableIndex(org.apache.druid.segment.QueryableIndex) Set(java.util.Set) Test(org.junit.Test) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) NoneShardSpec(org.apache.druid.timeline.partition.NoneShardSpec) Collectors(java.util.stream.Collectors) TableDataSource(org.apache.druid.query.TableDataSource) Executors(java.util.concurrent.Executors) ExecutionException(java.util.concurrent.ExecutionException) List(java.util.List) Ordering(com.google.common.collect.Ordering) PartitionIds(org.apache.druid.timeline.partition.PartitionIds) DataSegment(org.apache.druid.timeline.DataSegment) Optional(java.util.Optional) SegmentId(org.apache.druid.timeline.SegmentId) Assert(org.junit.Assert) ReferenceCountingSegment(org.apache.druid.segment.ReferenceCountingSegment) HashMap(java.util.HashMap) DataSegment(org.apache.druid.timeline.DataSegment) VersionedIntervalTimeline(org.apache.druid.timeline.VersionedIntervalTimeline) DataSourceState(org.apache.druid.server.SegmentManager.DataSourceState)

Example 13 with Segment

use of org.apache.druid.segment.Segment in project druid by druid-io.

the class SegmentManagerThreadSafetyTest method testLoadSameSegment.

@Test(timeout = 6000L)
public void testLoadSameSegment() throws IOException, ExecutionException, InterruptedException {
    final DataSegment segment = createSegment("2019-01-01/2019-01-02");
    final List<Future> futures = IntStream.range(0, 16).mapToObj(i -> exec.submit(() -> segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP))).collect(Collectors.toList());
    for (Future future : futures) {
        future.get();
    }
    Assert.assertEquals(1, segmentPuller.numFileLoaded.size());
    Assert.assertEquals(1, segmentPuller.numFileLoaded.values().iterator().next().intValue());
    Assert.assertEquals(0, segmentCacheManager.getSegmentLocks().size());
}
Also used : StorageAdapter(org.apache.druid.segment.StorageAdapter) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) Future(java.util.concurrent.Future) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) After(org.junit.After) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) FileUtils(org.apache.druid.java.util.common.FileUtils) Execs(org.apache.druid.java.util.common.concurrent.Execs) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Collectors(java.util.stream.Collectors) SegmentLocalCacheLoader(org.apache.druid.segment.loading.SegmentLocalCacheLoader) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) IntStream(java.util.stream.IntStream) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) Intervals(org.apache.druid.java.util.common.Intervals) Std(com.fasterxml.jackson.databind.InjectableValues.Std) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) FileCopyResult(org.apache.druid.java.util.common.FileUtils.FileCopyResult) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) Segment(org.apache.druid.segment.Segment) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) File(java.io.File) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ExecutionException(java.util.concurrent.ExecutionException) Rule(org.junit.Rule) Assert(org.junit.Assert) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) Future(java.util.concurrent.Future) DataSegment(org.apache.druid.timeline.DataSegment) Test(org.junit.Test)

Example 14 with Segment

use of org.apache.druid.segment.Segment in project druid by druid-io.

the class SegmentManagerThreadSafetyTest method testLoadMultipleSegments.

@Test(timeout = 6000L)
public void testLoadMultipleSegments() throws IOException, ExecutionException, InterruptedException {
    final List<DataSegment> segments = new ArrayList<>(88);
    for (int i = 0; i < 11; i++) {
        for (int j = 0; j < 8; j++) {
            segments.add(createSegment(StringUtils.format("2019-%02d-01/2019-%02d-01", i + 1, i + 2)));
        }
    }
    final List<Future> futures = IntStream.range(0, 16).mapToObj(i -> exec.submit(() -> {
        for (DataSegment segment : segments) {
            try {
                segmentManager.loadSegment(segment, false, SegmentLazyLoadFailCallback.NOOP);
            } catch (SegmentLoadingException e) {
                throw new RuntimeException(e);
            }
        }
    })).collect(Collectors.toList());
    for (Future future : futures) {
        future.get();
    }
    Assert.assertEquals(11, segmentPuller.numFileLoaded.size());
    Assert.assertEquals(1, segmentPuller.numFileLoaded.values().iterator().next().intValue());
    Assert.assertEquals(0, segmentCacheManager.getSegmentLocks().size());
}
Also used : StorageAdapter(org.apache.druid.segment.StorageAdapter) SimpleModule(com.fasterxml.jackson.databind.module.SimpleModule) Future(java.util.concurrent.Future) LocalDataSegmentPuller(org.apache.druid.segment.loading.LocalDataSegmentPuller) After(org.junit.After) Map(java.util.Map) NamedType(com.fasterxml.jackson.databind.jsontype.NamedType) SegmentizerFactory(org.apache.druid.segment.loading.SegmentizerFactory) FileUtils(org.apache.druid.java.util.common.FileUtils) Execs(org.apache.druid.java.util.common.concurrent.Execs) LocalLoadSpec(org.apache.druid.segment.loading.LocalLoadSpec) ImmutableMap(com.google.common.collect.ImmutableMap) NumberedShardSpec(org.apache.druid.timeline.partition.NumberedShardSpec) QueryableIndex(org.apache.druid.segment.QueryableIndex) StringUtils(org.apache.druid.java.util.common.StringUtils) Collectors(java.util.stream.Collectors) SegmentLocalCacheLoader(org.apache.druid.segment.loading.SegmentLocalCacheLoader) List(java.util.List) DataSegment(org.apache.druid.timeline.DataSegment) SegmentLocalCacheManager(org.apache.druid.segment.loading.SegmentLocalCacheManager) SegmentId(org.apache.druid.timeline.SegmentId) IntStream(java.util.stream.IntStream) DataSegmentPusher(org.apache.druid.segment.loading.DataSegmentPusher) SegmentLoaderConfig(org.apache.druid.segment.loading.SegmentLoaderConfig) Intervals(org.apache.druid.java.util.common.Intervals) Std(com.fasterxml.jackson.databind.InjectableValues.Std) SegmentLazyLoadFailCallback(org.apache.druid.segment.SegmentLazyLoadFailCallback) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Interval(org.joda.time.Interval) FileCopyResult(org.apache.druid.java.util.common.FileUtils.FileCopyResult) NoopServiceEmitter(org.apache.druid.server.metrics.NoopServiceEmitter) Nullable(javax.annotation.Nullable) ExecutorService(java.util.concurrent.ExecutorService) Before(org.junit.Before) Segment(org.apache.druid.segment.Segment) EmittingLogger(org.apache.druid.java.util.emitter.EmittingLogger) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Test(org.junit.Test) IOException(java.io.IOException) StorageLocationConfig(org.apache.druid.segment.loading.StorageLocationConfig) File(java.io.File) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ExecutionException(java.util.concurrent.ExecutionException) Rule(org.junit.Rule) Assert(org.junit.Assert) IndexIO(org.apache.druid.segment.IndexIO) Collections(java.util.Collections) TemporaryFolder(org.junit.rules.TemporaryFolder) SegmentLoadingException(org.apache.druid.segment.loading.SegmentLoadingException) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) DataSegment(org.apache.druid.timeline.DataSegment) Test(org.junit.Test)

Aggregations

Segment (org.apache.druid.segment.Segment)14 ArrayList (java.util.ArrayList)9 Interval (org.joda.time.Interval)9 Test (org.junit.Test)9 DataSegment (org.apache.druid.timeline.DataSegment)8 File (java.io.File)5 List (java.util.List)5 Map (java.util.Map)5 IncrementalIndexSegment (org.apache.druid.segment.IncrementalIndexSegment)5 ReferenceCountingSegment (org.apache.druid.segment.ReferenceCountingSegment)5 StorageAdapter (org.apache.druid.segment.StorageAdapter)5 ImmutableMap (com.google.common.collect.ImmutableMap)4 IOException (java.io.IOException)4 HashMap (java.util.HashMap)4 ExecutionException (java.util.concurrent.ExecutionException)4 ExecutorService (java.util.concurrent.ExecutorService)4 Future (java.util.concurrent.Future)4 Collectors (java.util.stream.Collectors)4 Intervals (org.apache.druid.java.util.common.Intervals)4 CountAggregatorFactory (org.apache.druid.query.aggregation.CountAggregatorFactory)4