Search in sources :

Example 6 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class MultiSegmentSelectQueryTest method setup.

@BeforeClass
public static void setup() throws IOException {
    CharSource v_0112 = CharSource.wrap(StringUtils.join(V_0112, "\n"));
    CharSource v_0113 = CharSource.wrap(StringUtils.join(V_0113, "\n"));
    CharSource v_override = CharSource.wrap(StringUtils.join(V_OVERRIDE, "\n"));
    IncrementalIndex index0 = TestIndex.loadIncrementalIndex(newIndex("2011-01-12T00:00:00.000Z"), v_0112);
    IncrementalIndex index1 = TestIndex.loadIncrementalIndex(newIndex("2011-01-13T00:00:00.000Z"), v_0113);
    IncrementalIndex index2 = TestIndex.loadIncrementalIndex(newIndex("2011-01-12T04:00:00.000Z"), v_override);
    segment0 = new IncrementalIndexSegment(index0, makeIdentifier(index0, "v1"));
    segment1 = new IncrementalIndexSegment(index1, makeIdentifier(index1, "v1"));
    segment_override = new IncrementalIndexSegment(index2, makeIdentifier(index2, "v2"));
    VersionedIntervalTimeline<String, Segment> timeline = new VersionedIntervalTimeline(StringComparators.LEXICOGRAPHIC);
    timeline.add(index0.getInterval(), "v1", new SingleElementPartitionChunk(segment0));
    timeline.add(index1.getInterval(), "v1", new SingleElementPartitionChunk(segment1));
    timeline.add(index2.getInterval(), "v2", new SingleElementPartitionChunk(segment_override));
    segmentIdentifiers = Lists.newArrayList();
    for (TimelineObjectHolder<String, ?> holder : timeline.lookup(new Interval("2011-01-12/2011-01-14"))) {
        segmentIdentifiers.add(makeIdentifier(holder.getInterval(), holder.getVersion()));
    }
    runner = QueryRunnerTestHelper.makeFilteringQueryRunner(timeline, factory);
}
Also used : CharSource(com.google.common.io.CharSource) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) DataSegment(io.druid.timeline.DataSegment) Segment(io.druid.segment.Segment) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) SingleElementPartitionChunk(io.druid.timeline.partition.SingleElementPartitionChunk) Interval(org.joda.time.Interval) BeforeClass(org.junit.BeforeClass)

Example 7 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class TimeBoundaryQueryRunnerTest method getCustomRunner.

private QueryRunner getCustomRunner() throws IOException {
    CharSource v_0112 = CharSource.wrap(StringUtils.join(V_0112, "\n"));
    CharSource v_0113 = CharSource.wrap(StringUtils.join(V_0113, "\n"));
    IncrementalIndex index0 = TestIndex.loadIncrementalIndex(newIndex("2011-01-12T00:00:00.000Z"), v_0112);
    IncrementalIndex index1 = TestIndex.loadIncrementalIndex(newIndex("2011-01-14T00:00:00.000Z"), v_0113);
    segment0 = new IncrementalIndexSegment(index0, makeIdentifier(index0, "v1"));
    segment1 = new IncrementalIndexSegment(index1, makeIdentifier(index1, "v1"));
    VersionedIntervalTimeline<String, Segment> timeline = new VersionedIntervalTimeline(StringComparators.LEXICOGRAPHIC);
    timeline.add(index0.getInterval(), "v1", new SingleElementPartitionChunk(segment0));
    timeline.add(index1.getInterval(), "v1", new SingleElementPartitionChunk(segment1));
    segmentIdentifiers = Lists.newArrayList();
    for (TimelineObjectHolder<String, ?> holder : timeline.lookup(new Interval("2011-01-12/2011-01-17"))) {
        segmentIdentifiers.add(makeIdentifier(holder.getInterval(), holder.getVersion()));
    }
    return QueryRunnerTestHelper.makeFilteringQueryRunner(timeline, factory);
}
Also used : CharSource(com.google.common.io.CharSource) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) VersionedIntervalTimeline(io.druid.timeline.VersionedIntervalTimeline) DataSegment(io.druid.timeline.DataSegment) Segment(io.druid.segment.Segment) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) SingleElementPartitionChunk(io.druid.timeline.partition.SingleElementPartitionChunk) Interval(org.joda.time.Interval)

Example 8 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class RealtimePlumber method persistAndMerge.

// Submits persist-n-merge task for a Sink to the mergeExecutor
private void persistAndMerge(final long truncatedTime, final Sink sink) {
    final String threadName = String.format("%s-%s-persist-n-merge", schema.getDataSource(), new DateTime(truncatedTime));
    mergeExecutor.execute(new ThreadRenamingRunnable(threadName) {

        final Interval interval = sink.getInterval();

        Stopwatch mergeStopwatch = null;

        @Override
        public void doRun() {
            try {
                // Bail out if this sink has been abandoned by a previously-executed task.
                if (sinks.get(truncatedTime) != sink) {
                    log.info("Sink[%s] was abandoned, bailing out of persist-n-merge.", sink);
                    return;
                }
                // Use a file to indicate that pushing has completed.
                final File persistDir = computePersistDir(schema, interval);
                final File mergedTarget = new File(persistDir, "merged");
                final File isPushedMarker = new File(persistDir, "isPushedMarker");
                if (!isPushedMarker.exists()) {
                    removeSegment(sink, mergedTarget);
                    if (mergedTarget.exists()) {
                        log.wtf("Merged target[%s] exists?!", mergedTarget);
                        return;
                    }
                } else {
                    log.info("Already pushed sink[%s]", sink);
                    return;
                }
                /*
            Note: it the plumber crashes after persisting a subset of hydrants then might duplicate data as these
            hydrants will be read but older commitMetadata will be used. fixing this possibly needs structural
            changes to plumber.
             */
                for (FireHydrant hydrant : sink) {
                    synchronized (hydrant) {
                        if (!hydrant.hasSwapped()) {
                            log.info("Hydrant[%s] hasn't swapped yet, swapping. Sink[%s]", hydrant, sink);
                            final int rowCount = persistHydrant(hydrant, schema, interval, null);
                            metrics.incrementRowOutputCount(rowCount);
                        }
                    }
                }
                final long mergeThreadCpuTime = VMUtils.safeGetThreadCpuTime();
                mergeStopwatch = Stopwatch.createStarted();
                List<QueryableIndex> indexes = Lists.newArrayList();
                for (FireHydrant fireHydrant : sink) {
                    Segment segment = fireHydrant.getSegment();
                    final QueryableIndex queryableIndex = segment.asQueryableIndex();
                    log.info("Adding hydrant[%s]", fireHydrant);
                    indexes.add(queryableIndex);
                }
                final File mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, config.getIndexSpec());
                // emit merge metrics before publishing segment
                metrics.incrementMergeCpuTime(VMUtils.safeGetThreadCpuTime() - mergeThreadCpuTime);
                metrics.incrementMergeTimeMillis(mergeStopwatch.elapsed(TimeUnit.MILLISECONDS));
                QueryableIndex index = indexIO.loadIndex(mergedFile);
                log.info("Pushing [%s] to deep storage", sink.getSegment().getIdentifier());
                DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
                log.info("Inserting [%s] to the metadata store", sink.getSegment().getIdentifier());
                segmentPublisher.publishSegment(segment);
                if (!isPushedMarker.createNewFile()) {
                    log.makeAlert("Failed to create marker file for [%s]", schema.getDataSource()).addData("interval", sink.getInterval()).addData("partitionNum", segment.getShardSpec().getPartitionNum()).addData("marker", isPushedMarker).emit();
                }
            } catch (Exception e) {
                metrics.incrementFailedHandoffs();
                log.makeAlert(e, "Failed to persist merged index[%s]", schema.getDataSource()).addData("interval", interval).emit();
                if (shuttingDown) {
                    // We're trying to shut down, and this segment failed to push. Let's just get rid of it.
                    // This call will also delete possibly-partially-written files, so we don't need to do it explicitly.
                    cleanShutdown = false;
                    abandonSegment(truncatedTime, sink);
                }
            } finally {
                if (mergeStopwatch != null) {
                    mergeStopwatch.stop();
                }
            }
        }
    });
    handoffNotifier.registerSegmentHandoffCallback(new SegmentDescriptor(sink.getInterval(), sink.getVersion(), config.getShardSpec().getPartitionNum()), mergeExecutor, new Runnable() {

        @Override
        public void run() {
            abandonSegment(sink.getInterval().getStartMillis(), sink);
            metrics.incrementHandOffCount();
        }
    });
}
Also used : Stopwatch(com.google.common.base.Stopwatch) DataSegment(io.druid.timeline.DataSegment) DateTime(org.joda.time.DateTime) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) QueryableIndex(io.druid.segment.QueryableIndex) SegmentDescriptor(io.druid.query.SegmentDescriptor) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) List(java.util.List) FireHydrant(io.druid.segment.realtime.FireHydrant) ThreadRenamingRunnable(io.druid.common.guava.ThreadRenamingRunnable) File(java.io.File) Interval(org.joda.time.Interval)

Example 9 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class OnheapIncrementalIndexBenchmark method testConcurrentAddRead.

@Ignore
@Test
@BenchmarkOptions(callgc = true, clock = Clock.REAL_TIME, warmupRounds = 10, benchmarkRounds = 20)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {
    final int taskCount = 30;
    final int concurrentThreads = 3;
    final int elementsPerThread = 1 << 15;
    final OnheapIncrementalIndex incrementalIndex = this.incrementalIndex.getConstructor(Long.TYPE, Granularity.class, AggregatorFactory[].class, Integer.TYPE).newInstance(0, Granularities.NONE, factories, elementsPerThread * taskCount);
    final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(dimensionCount + 1);
    queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
        queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
    }
    final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
    final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
    final long timestamp = System.currentTimeMillis();
    final Interval queryInterval = new Interval("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
    final List<ListenableFuture<?>> indexFutures = new LinkedList<>();
    final List<ListenableFuture<?>> queryFutures = new LinkedList<>();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(incrementalIndex, null);
    final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final AtomicInteger currentlyRunning = new AtomicInteger(0);
    final AtomicBoolean concurrentlyRan = new AtomicBoolean(false);
    final AtomicBoolean someoneRan = new AtomicBoolean(false);
    for (int j = 0; j < taskCount; j++) {
        indexFutures.add(indexExecutor.submit(new Runnable() {

            @Override
            public void run() {
                currentlyRunning.incrementAndGet();
                try {
                    for (int i = 0; i < elementsPerThread; i++) {
                        incrementalIndex.add(getLongRow(timestamp + i, 1, dimensionCount));
                    }
                } catch (IndexSizeExceededException e) {
                    throw Throwables.propagate(e);
                }
                currentlyRunning.decrementAndGet();
                someoneRan.set(true);
            }
        }));
        queryFutures.add(queryExecutor.submit(new Runnable() {

            @Override
            public void run() {
                QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
                TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
                Map<String, Object> context = new HashMap<String, Object>();
                for (Result<TimeseriesResultValue> result : Sequences.toList(runner.run(query, context), new LinkedList<Result<TimeseriesResultValue>>())) {
                    if (someoneRan.get()) {
                        Assert.assertTrue(result.getValue().getDoubleMetric("doubleSumResult0") > 0);
                    }
                }
                if (currentlyRunning.get() > 0) {
                    concurrentlyRan.set(true);
                }
            }
        }));
    }
    List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
    allFutures.addAll(queryFutures);
    allFutures.addAll(indexFutures);
    Futures.allAsList(allFutures).get();
    //Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get());
    queryExecutor.shutdown();
    indexExecutor.shutdown();
    QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
    Map<String, Object> context = new HashMap<String, Object>();
    List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, context), new LinkedList<Result<TimeseriesResultValue>>());
    final int expectedVal = elementsPerThread * taskCount;
    for (Result<TimeseriesResultValue> result : results) {
        Assert.assertEquals(elementsPerThread, result.getValue().getLongMetric("rows").intValue());
        for (int i = 0; i < dimensionCount; ++i) {
            Assert.assertEquals(String.format("Failed long sum on dimension %d", i), expectedVal, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
            Assert.assertEquals(String.format("Failed double sum on dimension %d", i), expectedVal, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
        }
    }
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(io.druid.query.timeseries.TimeseriesQueryQueryToolChest) Granularity(io.druid.java.util.common.granularity.Granularity) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) Segment(io.druid.segment.Segment) Result(io.druid.query.Result) TimeseriesQueryEngine(io.druid.query.timeseries.TimeseriesQueryEngine) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) LinkedList(java.util.LinkedList) AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) QueryRunnerFactory(io.druid.query.QueryRunnerFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Interval(org.joda.time.Interval) Ignore(org.junit.Ignore) Test(org.junit.Test) BenchmarkOptions(com.carrotsearch.junitbenchmarks.BenchmarkOptions)

Example 10 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class DistinctCountGroupByQueryTest method testGroupByWithDistinctCountAgg.

@Test
public void testGroupByWithDistinctCountAgg() throws Exception {
    final GroupByQueryConfig config = new GroupByQueryConfig();
    config.setMaxIntermediateRows(10000);
    final GroupByQueryRunnerFactory factory = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
    IncrementalIndex index = new OnheapIncrementalIndex(0, Granularities.SECOND, new AggregatorFactory[] { new CountAggregatorFactory("cnt") }, 1000);
    String visitor_id = "visitor_id";
    String client_type = "client_type";
    long timestamp = System.currentTimeMillis();
    index.add(new MapBasedInputRow(timestamp, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "0", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 1, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "1", client_type, "iphone")));
    index.add(new MapBasedInputRow(timestamp + 2, Lists.newArrayList(visitor_id, client_type), ImmutableMap.<String, Object>of(visitor_id, "2", client_type, "android")));
    GroupByQuery query = new GroupByQuery.Builder().setDataSource(QueryRunnerTestHelper.dataSource).setGranularity(QueryRunnerTestHelper.allGran).setDimensions(Arrays.<DimensionSpec>asList(new DefaultDimensionSpec(client_type, client_type))).setInterval(QueryRunnerTestHelper.fullOnInterval).setLimitSpec(new DefaultLimitSpec(Lists.newArrayList(new OrderByColumnSpec(client_type, OrderByColumnSpec.Direction.DESCENDING)), 10)).setAggregatorSpecs(Lists.newArrayList(QueryRunnerTestHelper.rowsCount, new DistinctCountAggregatorFactory("UV", visitor_id, null))).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, factory.createRunner(incrementalIndexSegment), query);
    List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "iphone", "UV", 2L, "rows", 2L), GroupByQueryRunnerTestHelper.createExpectedRow("1970-01-01T00:00:00.000Z", client_type, "android", "UV", 1L, "rows", 1L));
    TestHelper.assertExpectedObjects(expectedResults, results, "distinct-count");
}
Also used : GroupByQueryRunnerFactory(io.druid.query.groupby.GroupByQueryRunnerFactory) DefaultLimitSpec(io.druid.query.groupby.orderby.DefaultLimitSpec) GroupByQueryConfig(io.druid.query.groupby.GroupByQueryConfig) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) DefaultDimensionSpec(io.druid.query.dimension.DefaultDimensionSpec) Segment(io.druid.segment.Segment) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) OrderByColumnSpec(io.druid.query.groupby.orderby.OrderByColumnSpec) GroupByQuery(io.druid.query.groupby.GroupByQuery) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) MapBasedInputRow(io.druid.data.input.MapBasedInputRow) Row(io.druid.data.input.Row) Test(org.junit.Test) GroupByQueryRunnerTest(io.druid.query.groupby.GroupByQueryRunnerTest)

Aggregations

Segment (io.druid.segment.Segment)11 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)7 Interval (org.joda.time.Interval)7 DataSegment (io.druid.timeline.DataSegment)6 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)5 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)5 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)4 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)4 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)4 Test (org.junit.Test)4 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)3 QueryRunnerFactory (io.druid.query.QueryRunnerFactory)3 Result (io.druid.query.Result)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)3 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)3 CharSource (com.google.common.io.CharSource)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)2 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)2