Search in sources :

Example 16 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IncrementalIndexTest method testSingleThreadedIndexingAndQuery.

@Test
public void testSingleThreadedIndexingAndQuery() throws Exception {
    final int dimensionCount = 5;
    final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>();
    ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
        ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
    }
    final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[ingestAggregatorFactories.size()])));
    final long timestamp = System.currentTimeMillis();
    final int rows = 50;
    //ingesting same data twice to have some merging happening
    for (int i = 0; i < rows; i++) {
        index.add(getLongRow(timestamp + i, i, dimensionCount));
    }
    for (int i = 0; i < rows; i++) {
        index.add(getLongRow(timestamp + i, i, dimensionCount));
    }
    //run a timeseries query on the index and verify results
    final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>();
    queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
        queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
    }
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(new Interval("2000/2030"))).aggregators(queryAggregatorFactories).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
    List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, new HashMap<String, Object>()), new LinkedList<Result<TimeseriesResultValue>>());
    Result<TimeseriesResultValue> result = Iterables.getOnlyElement(results);
    boolean isRollup = index.isRollup();
    Assert.assertEquals(rows * (isRollup ? 1 : 2), result.getValue().getLongMetric("rows").intValue());
    for (int i = 0; i < dimensionCount; ++i) {
        Assert.assertEquals(String.format("Failed long sum on dimension %d", i), 2 * rows, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
        Assert.assertEquals(String.format("Failed double sum on dimension %d", i), 2 * rows, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
    }
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(io.druid.query.timeseries.TimeseriesQueryQueryToolChest) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) Segment(io.druid.segment.Segment) Result(io.druid.query.Result) TimeseriesQueryEngine(io.druid.query.timeseries.TimeseriesQueryEngine) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) QueryRunnerFactory(io.druid.query.QueryRunnerFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 17 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IncrementalIndexTest method testConcurrentAddRead.

@Test(timeout = 60_000L)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException {
    final int dimensionCount = 5;
    final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>(dimensionCount + 1);
    ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
        ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
    }
    final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(dimensionCount + 1);
    queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
        queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
    }
    final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[dimensionCount])));
    final int concurrentThreads = 2;
    final int elementsPerThread = 10_000;
    final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
    final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
    final long timestamp = System.currentTimeMillis();
    final Interval queryInterval = new Interval("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
    final List<ListenableFuture<?>> indexFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
    final List<ListenableFuture<?>> queryFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final AtomicInteger currentlyRunning = new AtomicInteger(0);
    final AtomicInteger concurrentlyRan = new AtomicInteger(0);
    final AtomicInteger someoneRan = new AtomicInteger(0);
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch readyLatch = new CountDownLatch(concurrentThreads * 2);
    final AtomicInteger queriesAccumualted = new AtomicInteger(0);
    for (int j = 0; j < concurrentThreads; j++) {
        indexFutures.add(indexExecutor.submit(new Runnable() {

            @Override
            public void run() {
                readyLatch.countDown();
                try {
                    startLatch.await();
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw Throwables.propagate(e);
                }
                currentlyRunning.incrementAndGet();
                try {
                    for (int i = 0; i < elementsPerThread; i++) {
                        index.add(getLongRow(timestamp + i, i, dimensionCount));
                        someoneRan.incrementAndGet();
                    }
                } catch (IndexSizeExceededException e) {
                    throw Throwables.propagate(e);
                }
                currentlyRunning.decrementAndGet();
            }
        }));
        final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
        queryFutures.add(queryExecutor.submit(new Runnable() {

            @Override
            public void run() {
                readyLatch.countDown();
                try {
                    startLatch.await();
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw Throwables.propagate(e);
                }
                while (concurrentlyRan.get() == 0) {
                    QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
                    Map<String, Object> context = new HashMap<String, Object>();
                    Sequence<Result<TimeseriesResultValue>> sequence = runner.run(query, context);
                    for (Double result : sequence.accumulate(new Double[0], new Accumulator<Double[], Result<TimeseriesResultValue>>() {

                        @Override
                        public Double[] accumulate(Double[] accumulated, Result<TimeseriesResultValue> in) {
                            if (currentlyRunning.get() > 0) {
                                concurrentlyRan.incrementAndGet();
                            }
                            queriesAccumualted.incrementAndGet();
                            return Lists.asList(in.getValue().getDoubleMetric("doubleSumResult0"), accumulated).toArray(new Double[accumulated.length + 1]);
                        }
                    })) {
                        final Integer maxValueExpected = someoneRan.get() + concurrentThreads;
                        if (maxValueExpected > 0) {
                            // Eventually consistent, but should be somewhere in that range
                            // Actual result is validated after all writes are guaranteed done.
                            Assert.assertTrue(String.format("%d >= %g >= 0 violated", maxValueExpected, result), result >= 0 && result <= maxValueExpected);
                        }
                    }
                }
            }
        }));
    }
    readyLatch.await();
    startLatch.countDown();
    List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
    allFutures.addAll(queryFutures);
    allFutures.addAll(indexFutures);
    Futures.allAsList(allFutures).get();
    Assert.assertTrue("Queries ran too fast", queriesAccumualted.get() > 0);
    Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get() > 0);
    queryExecutor.shutdown();
    indexExecutor.shutdown();
    QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
    Map<String, Object> context = new HashMap<String, Object>();
    List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, context), new LinkedList<Result<TimeseriesResultValue>>());
    boolean isRollup = index.isRollup();
    for (Result<TimeseriesResultValue> result : results) {
        Assert.assertEquals(elementsPerThread * (isRollup ? 1 : concurrentThreads), result.getValue().getLongMetric("rows").intValue());
        for (int i = 0; i < dimensionCount; ++i) {
            Assert.assertEquals(String.format("Failed long sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
            Assert.assertEquals(String.format("Failed double sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
        }
    }
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(io.druid.query.timeseries.TimeseriesQueryQueryToolChest) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) Segment(io.druid.segment.Segment) Result(io.druid.query.Result) TimeseriesQueryEngine(io.druid.query.timeseries.TimeseriesQueryEngine) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) QueryRunnerFactory(io.druid.query.QueryRunnerFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Interval(org.joda.time.Interval) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) Test(org.junit.Test)

Example 18 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class IncrementalIndexTest method testCaseSensitivity.

@Test
public void testCaseSensitivity() throws Exception {
    long timestamp = System.currentTimeMillis();
    IncrementalIndex index = closer.closeLater(indexCreator.createIndex(defaultAggregatorFactories));
    populateIndex(timestamp, index);
    Assert.assertEquals(Arrays.asList("dim1", "dim2"), index.getDimensionNames());
    Assert.assertEquals(2, index.size());
    final Iterator<Row> rows = index.iterator();
    Row row = rows.next();
    Assert.assertEquals(timestamp, row.getTimestampFromEpoch());
    Assert.assertEquals(Arrays.asList("1"), row.getDimension("dim1"));
    Assert.assertEquals(Arrays.asList("2"), row.getDimension("dim2"));
    row = rows.next();
    Assert.assertEquals(timestamp, row.getTimestampFromEpoch());
    Assert.assertEquals(Arrays.asList("3"), row.getDimension("dim1"));
    Assert.assertEquals(Arrays.asList("4"), row.getDimension("dim2"));
}
Also used : OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) Test(org.junit.Test)

Example 19 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class TestIndex method getMMappedTestIndex.

public static QueryableIndex getMMappedTestIndex() {
    synchronized (log) {
        if (mmappedIndex != null) {
            return mmappedIndex;
        }
    }
    IncrementalIndex incrementalIndex = getIncrementalTestIndex();
    mmappedIndex = persistRealtimeAndLoadMMapped(incrementalIndex);
    return mmappedIndex;
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex)

Example 20 with IncrementalIndex

use of io.druid.segment.incremental.IncrementalIndex in project druid by druid-io.

the class TestIndex method makeRealtimeIndex.

public static IncrementalIndex makeRealtimeIndex(final CharSource source, boolean rollup) {
    final IncrementalIndexSchema schema = new IncrementalIndexSchema.Builder().withMinTimestamp(new DateTime("2011-01-12T00:00:00.000Z").getMillis()).withTimestampSpec(new TimestampSpec("ds", "auto", null)).withQueryGranularity(Granularities.NONE).withDimensionsSpec(DIMENSIONS_SPEC).withVirtualColumns(VIRTUAL_COLUMNS).withMetrics(METRIC_AGGS).withRollup(rollup).build();
    final IncrementalIndex retVal = new OnheapIncrementalIndex(schema, true, 10000);
    try {
        return loadIncrementalIndex(retVal, source);
    } catch (Exception e) {
        if (rollup) {
            realtimeIndex = null;
        } else {
            noRollupRealtimeIndex = null;
        }
        throw Throwables.propagate(e);
    }
}
Also used : IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) TimestampSpec(io.druid.data.input.impl.TimestampSpec) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSchema(io.druid.segment.incremental.IncrementalIndexSchema) DateTime(org.joda.time.DateTime) IOException(java.io.IOException)

Aggregations

IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)89 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)81 File (java.io.File)49 Test (org.junit.Test)37 MapBasedInputRow (io.druid.data.input.MapBasedInputRow)33 IncrementalIndexTest (io.druid.segment.data.IncrementalIndexTest)26 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)25 DateTime (org.joda.time.DateTime)21 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)16 QueryableIndex (io.druid.segment.QueryableIndex)16 DimensionsSpec (io.druid.data.input.impl.DimensionsSpec)13 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)13 IndexSpec (io.druid.segment.IndexSpec)13 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)12 IncrementalIndexAdapter (io.druid.segment.incremental.IncrementalIndexAdapter)12 Interval (org.joda.time.Interval)11 InputRow (io.druid.data.input.InputRow)10 IOException (java.io.IOException)10 BenchmarkDataGenerator (io.druid.benchmark.datagen.BenchmarkDataGenerator)8 HyperUniquesSerde (io.druid.query.aggregation.hyperloglog.HyperUniquesSerde)8