Search in sources :

Example 1 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class SpecificSegmentsQuerySegmentWalker method add.

public SpecificSegmentsQuerySegmentWalker add(final DataSegment descriptor, final QueryableIndex index) {
    final Segment segment = new QueryableIndexSegment(descriptor.getIdentifier(), index);
    if (!timelines.containsKey(descriptor.getDataSource())) {
        timelines.put(descriptor.getDataSource(), new VersionedIntervalTimeline<String, Segment>(Ordering.natural()));
    }
    final VersionedIntervalTimeline<String, Segment> timeline = timelines.get(descriptor.getDataSource());
    timeline.add(descriptor.getInterval(), descriptor.getVersion(), descriptor.getShardSpec().createChunk(segment));
    segments.add(descriptor);
    closeables.add(index);
    return this;
}
Also used : QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) DataSegment(io.druid.timeline.DataSegment) Segment(io.druid.segment.Segment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment)

Example 2 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class AppenderatorImpl method mergeAndPush.

/**
   * Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
   * be run in the single-threaded pushExecutor.
   *
   * @param identifier sink identifier
   * @param sink       sink to push
   *
   * @return segment descriptor, or null if the sink is no longer valid
   */
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
    // Bail out if this sink is null or otherwise not what we expect.
    if (sinks.get(identifier) != sink) {
        log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
        return null;
    }
    // Use a descriptor file to indicate that pushing has completed.
    final File persistDir = computePersistDir(identifier);
    final File mergedTarget = new File(persistDir, "merged");
    final File descriptorFile = computeDescriptorFile(identifier);
    // Sanity checks
    for (FireHydrant hydrant : sink) {
        if (sink.isWritable()) {
            throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
        }
        synchronized (hydrant) {
            if (!hydrant.hasSwapped()) {
                throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
            }
        }
    }
    try {
        if (descriptorFile.exists()) {
            // Already pushed.
            log.info("Segment[%s] already pushed.", identifier);
            return objectMapper.readValue(descriptorFile, DataSegment.class);
        }
        log.info("Pushing merged index for segment[%s].", identifier);
        removeDirectory(mergedTarget);
        if (mergedTarget.exists()) {
            throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
        }
        List<QueryableIndex> indexes = Lists.newArrayList();
        for (FireHydrant fireHydrant : sink) {
            Segment segment = fireHydrant.getSegment();
            final QueryableIndex queryableIndex = segment.asQueryableIndex();
            log.info("Adding hydrant[%s]", fireHydrant);
            indexes.add(queryableIndex);
        }
        final File mergedFile;
        mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
        QueryableIndex index = indexIO.loadIndex(mergedFile);
        DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
        objectMapper.writeValue(descriptorFile, segment);
        log.info("Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
        return segment;
    } catch (Exception e) {
        metrics.incrementFailedHandoffs();
        log.warn(e, "Failed to push merged index for segment[%s].", identifier);
        throw Throwables.propagate(e);
    }
}
Also used : QueryableIndex(io.druid.segment.QueryableIndex) ISE(io.druid.java.util.common.ISE) FireHydrant(io.druid.segment.realtime.FireHydrant) File(java.io.File) DataSegment(io.druid.timeline.DataSegment) DataSegment(io.druid.timeline.DataSegment) QueryableIndexSegment(io.druid.segment.QueryableIndexSegment) Segment(io.druid.segment.Segment) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) IOException(java.io.IOException) ExecutionException(java.util.concurrent.ExecutionException)

Example 3 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class ServerManager method loadSegment.

/**
   * Load a single segment.
   *
   * @param segment segment to load
   *
   * @return true if the segment was newly loaded, false if it was already loaded
   *
   * @throws SegmentLoadingException if the segment cannot be loaded
   */
public boolean loadSegment(final DataSegment segment) throws SegmentLoadingException {
    final Segment adapter;
    try {
        adapter = segmentLoader.getSegment(segment);
    } catch (SegmentLoadingException e) {
        try {
            segmentLoader.cleanup(segment);
        } catch (SegmentLoadingException e1) {
        // ignore
        }
        throw e;
    }
    if (adapter == null) {
        throw new SegmentLoadingException("Null adapter from loadSpec[%s]", segment.getLoadSpec());
    }
    synchronized (lock) {
        String dataSource = segment.getDataSource();
        VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSources.get(dataSource);
        if (loadedIntervals == null) {
            loadedIntervals = new VersionedIntervalTimeline<>(Ordering.natural());
            dataSources.put(dataSource, loadedIntervals);
        }
        PartitionHolder<ReferenceCountingSegment> entry = loadedIntervals.findEntry(segment.getInterval(), segment.getVersion());
        if ((entry != null) && (entry.getChunk(segment.getShardSpec().getPartitionNum()) != null)) {
            log.warn("Told to load a adapter for a segment[%s] that already exists", segment.getIdentifier());
            return false;
        }
        loadedIntervals.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(new ReferenceCountingSegment(adapter)));
        synchronized (dataSourceSizes) {
            dataSourceSizes.add(dataSource, segment.getSize());
        }
        synchronized (dataSourceCounts) {
            dataSourceCounts.add(dataSource, 1L);
        }
        return true;
    }
}
Also used : ReferenceCountingSegment(io.druid.segment.ReferenceCountingSegment) SegmentLoadingException(io.druid.segment.loading.SegmentLoadingException) DataSegment(io.druid.timeline.DataSegment) ReferenceCountingSegment(io.druid.segment.ReferenceCountingSegment) Segment(io.druid.segment.Segment)

Example 4 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class IncrementalIndexTest method testSingleThreadedIndexingAndQuery.

@Test
public void testSingleThreadedIndexingAndQuery() throws Exception {
    final int dimensionCount = 5;
    final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>();
    ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
        ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
    }
    final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[ingestAggregatorFactories.size()])));
    final long timestamp = System.currentTimeMillis();
    final int rows = 50;
    //ingesting same data twice to have some merging happening
    for (int i = 0; i < rows; i++) {
        index.add(getLongRow(timestamp + i, i, dimensionCount));
    }
    for (int i = 0; i < rows; i++) {
        index.add(getLongRow(timestamp + i, i, dimensionCount));
    }
    //run a timeseries query on the index and verify results
    final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>();
    queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
        queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
    }
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(new Interval("2000/2030"))).aggregators(queryAggregatorFactories).build();
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
    List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, new HashMap<String, Object>()), new LinkedList<Result<TimeseriesResultValue>>());
    Result<TimeseriesResultValue> result = Iterables.getOnlyElement(results);
    boolean isRollup = index.isRollup();
    Assert.assertEquals(rows * (isRollup ? 1 : 2), result.getValue().getLongMetric("rows").intValue());
    for (int i = 0; i < dimensionCount; ++i) {
        Assert.assertEquals(String.format("Failed long sum on dimension %d", i), 2 * rows, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
        Assert.assertEquals(String.format("Failed double sum on dimension %d", i), 2 * rows, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
    }
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(io.druid.query.timeseries.TimeseriesQueryQueryToolChest) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) Segment(io.druid.segment.Segment) Result(io.druid.query.Result) TimeseriesQueryEngine(io.druid.query.timeseries.TimeseriesQueryEngine) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) QueryRunnerFactory(io.druid.query.QueryRunnerFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) Interval(org.joda.time.Interval) Test(org.junit.Test)

Example 5 with Segment

use of io.druid.segment.Segment in project druid by druid-io.

the class IncrementalIndexTest method testConcurrentAddRead.

@Test(timeout = 60_000L)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException {
    final int dimensionCount = 5;
    final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>(dimensionCount + 1);
    ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
        ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
    }
    final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(dimensionCount + 1);
    queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
    for (int i = 0; i < dimensionCount; ++i) {
        queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
        queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
    }
    final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[dimensionCount])));
    final int concurrentThreads = 2;
    final int elementsPerThread = 10_000;
    final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
    final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
    final long timestamp = System.currentTimeMillis();
    final Interval queryInterval = new Interval("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
    final List<ListenableFuture<?>> indexFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
    final List<ListenableFuture<?>> queryFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
    final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
    final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
    final AtomicInteger currentlyRunning = new AtomicInteger(0);
    final AtomicInteger concurrentlyRan = new AtomicInteger(0);
    final AtomicInteger someoneRan = new AtomicInteger(0);
    final CountDownLatch startLatch = new CountDownLatch(1);
    final CountDownLatch readyLatch = new CountDownLatch(concurrentThreads * 2);
    final AtomicInteger queriesAccumualted = new AtomicInteger(0);
    for (int j = 0; j < concurrentThreads; j++) {
        indexFutures.add(indexExecutor.submit(new Runnable() {

            @Override
            public void run() {
                readyLatch.countDown();
                try {
                    startLatch.await();
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw Throwables.propagate(e);
                }
                currentlyRunning.incrementAndGet();
                try {
                    for (int i = 0; i < elementsPerThread; i++) {
                        index.add(getLongRow(timestamp + i, i, dimensionCount));
                        someoneRan.incrementAndGet();
                    }
                } catch (IndexSizeExceededException e) {
                    throw Throwables.propagate(e);
                }
                currentlyRunning.decrementAndGet();
            }
        }));
        final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
        queryFutures.add(queryExecutor.submit(new Runnable() {

            @Override
            public void run() {
                readyLatch.countDown();
                try {
                    startLatch.await();
                } catch (InterruptedException e) {
                    Thread.currentThread().interrupt();
                    throw Throwables.propagate(e);
                }
                while (concurrentlyRan.get() == 0) {
                    QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
                    Map<String, Object> context = new HashMap<String, Object>();
                    Sequence<Result<TimeseriesResultValue>> sequence = runner.run(query, context);
                    for (Double result : sequence.accumulate(new Double[0], new Accumulator<Double[], Result<TimeseriesResultValue>>() {

                        @Override
                        public Double[] accumulate(Double[] accumulated, Result<TimeseriesResultValue> in) {
                            if (currentlyRunning.get() > 0) {
                                concurrentlyRan.incrementAndGet();
                            }
                            queriesAccumualted.incrementAndGet();
                            return Lists.asList(in.getValue().getDoubleMetric("doubleSumResult0"), accumulated).toArray(new Double[accumulated.length + 1]);
                        }
                    })) {
                        final Integer maxValueExpected = someoneRan.get() + concurrentThreads;
                        if (maxValueExpected > 0) {
                            // Eventually consistent, but should be somewhere in that range
                            // Actual result is validated after all writes are guaranteed done.
                            Assert.assertTrue(String.format("%d >= %g >= 0 violated", maxValueExpected, result), result >= 0 && result <= maxValueExpected);
                        }
                    }
                }
            }
        }));
    }
    readyLatch.await();
    startLatch.countDown();
    List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
    allFutures.addAll(queryFutures);
    allFutures.addAll(indexFutures);
    Futures.allAsList(allFutures).get();
    Assert.assertTrue("Queries ran too fast", queriesAccumualted.get() > 0);
    Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get() > 0);
    queryExecutor.shutdown();
    indexExecutor.shutdown();
    QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
    Map<String, Object> context = new HashMap<String, Object>();
    List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, context), new LinkedList<Result<TimeseriesResultValue>>());
    boolean isRollup = index.isRollup();
    for (Result<TimeseriesResultValue> result : results) {
        Assert.assertEquals(elementsPerThread * (isRollup ? 1 : concurrentThreads), result.getValue().getLongMetric("rows").intValue());
        for (int i = 0; i < dimensionCount; ++i) {
            Assert.assertEquals(String.format("Failed long sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
            Assert.assertEquals(String.format("Failed double sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
        }
    }
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) TimeseriesQueryQueryToolChest(io.druid.query.timeseries.TimeseriesQueryQueryToolChest) IncrementalIndexSegment(io.druid.segment.IncrementalIndexSegment) Segment(io.druid.segment.Segment) Result(io.druid.query.Result) TimeseriesQueryEngine(io.druid.query.timeseries.TimeseriesQueryEngine) ThreadFactoryBuilder(com.google.common.util.concurrent.ThreadFactoryBuilder) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) OffheapIncrementalIndex(io.druid.segment.incremental.OffheapIncrementalIndex) IncrementalIndex(io.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(io.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) DoubleSumAggregatorFactory(io.druid.query.aggregation.DoubleSumAggregatorFactory) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory) FilteredAggregatorFactory(io.druid.query.aggregation.FilteredAggregatorFactory) LongSumAggregatorFactory(io.druid.query.aggregation.LongSumAggregatorFactory) CountDownLatch(java.util.concurrent.CountDownLatch) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) CountAggregatorFactory(io.druid.query.aggregation.CountAggregatorFactory) QueryRunnerFactory(io.druid.query.QueryRunnerFactory) TimeseriesQueryRunnerFactory(io.druid.query.timeseries.TimeseriesQueryRunnerFactory) FinalizeResultsQueryRunner(io.druid.query.FinalizeResultsQueryRunner) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) ListenableFuture(com.google.common.util.concurrent.ListenableFuture) ListeningExecutorService(com.google.common.util.concurrent.ListeningExecutorService) Interval(org.joda.time.Interval) IndexSizeExceededException(io.druid.segment.incremental.IndexSizeExceededException) Test(org.junit.Test)

Aggregations

Segment (io.druid.segment.Segment)11 IncrementalIndexSegment (io.druid.segment.IncrementalIndexSegment)7 Interval (org.joda.time.Interval)7 DataSegment (io.druid.timeline.DataSegment)6 IncrementalIndex (io.druid.segment.incremental.IncrementalIndex)5 OnheapIncrementalIndex (io.druid.segment.incremental.OnheapIncrementalIndex)5 CountAggregatorFactory (io.druid.query.aggregation.CountAggregatorFactory)4 TimeseriesQuery (io.druid.query.timeseries.TimeseriesQuery)4 QueryableIndexSegment (io.druid.segment.QueryableIndexSegment)4 Test (org.junit.Test)4 FinalizeResultsQueryRunner (io.druid.query.FinalizeResultsQueryRunner)3 QueryRunnerFactory (io.druid.query.QueryRunnerFactory)3 Result (io.druid.query.Result)3 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)3 DoubleSumAggregatorFactory (io.druid.query.aggregation.DoubleSumAggregatorFactory)3 LongSumAggregatorFactory (io.druid.query.aggregation.LongSumAggregatorFactory)3 CharSource (com.google.common.io.CharSource)2 ListenableFuture (com.google.common.util.concurrent.ListenableFuture)2 ListeningExecutorService (com.google.common.util.concurrent.ListeningExecutorService)2 ThreadFactoryBuilder (com.google.common.util.concurrent.ThreadFactoryBuilder)2