use of io.druid.segment.Segment in project druid by druid-io.
the class SpecificSegmentsQuerySegmentWalker method add.
public SpecificSegmentsQuerySegmentWalker add(final DataSegment descriptor, final QueryableIndex index) {
final Segment segment = new QueryableIndexSegment(descriptor.getIdentifier(), index);
if (!timelines.containsKey(descriptor.getDataSource())) {
timelines.put(descriptor.getDataSource(), new VersionedIntervalTimeline<String, Segment>(Ordering.natural()));
}
final VersionedIntervalTimeline<String, Segment> timeline = timelines.get(descriptor.getDataSource());
timeline.add(descriptor.getInterval(), descriptor.getVersion(), descriptor.getShardSpec().createChunk(segment));
segments.add(descriptor);
closeables.add(index);
return this;
}
use of io.druid.segment.Segment in project druid by druid-io.
the class AppenderatorImpl method mergeAndPush.
/**
* Merge segment, push to deep storage. Should only be used on segments that have been fully persisted. Must only
* be run in the single-threaded pushExecutor.
*
* @param identifier sink identifier
* @param sink sink to push
*
* @return segment descriptor, or null if the sink is no longer valid
*/
private DataSegment mergeAndPush(final SegmentIdentifier identifier, final Sink sink) {
// Bail out if this sink is null or otherwise not what we expect.
if (sinks.get(identifier) != sink) {
log.warn("Sink for segment[%s] no longer valid, bailing out of mergeAndPush.", identifier);
return null;
}
// Use a descriptor file to indicate that pushing has completed.
final File persistDir = computePersistDir(identifier);
final File mergedTarget = new File(persistDir, "merged");
final File descriptorFile = computeDescriptorFile(identifier);
// Sanity checks
for (FireHydrant hydrant : sink) {
if (sink.isWritable()) {
throw new ISE("WTF?! Expected sink to be no longer writable before mergeAndPush. Segment[%s].", identifier);
}
synchronized (hydrant) {
if (!hydrant.hasSwapped()) {
throw new ISE("WTF?! Expected sink to be fully persisted before mergeAndPush. Segment[%s].", identifier);
}
}
}
try {
if (descriptorFile.exists()) {
// Already pushed.
log.info("Segment[%s] already pushed.", identifier);
return objectMapper.readValue(descriptorFile, DataSegment.class);
}
log.info("Pushing merged index for segment[%s].", identifier);
removeDirectory(mergedTarget);
if (mergedTarget.exists()) {
throw new ISE("Merged target[%s] exists after removing?!", mergedTarget);
}
List<QueryableIndex> indexes = Lists.newArrayList();
for (FireHydrant fireHydrant : sink) {
Segment segment = fireHydrant.getSegment();
final QueryableIndex queryableIndex = segment.asQueryableIndex();
log.info("Adding hydrant[%s]", fireHydrant);
indexes.add(queryableIndex);
}
final File mergedFile;
mergedFile = indexMerger.mergeQueryableIndex(indexes, schema.getGranularitySpec().isRollup(), schema.getAggregators(), mergedTarget, tuningConfig.getIndexSpec());
QueryableIndex index = indexIO.loadIndex(mergedFile);
DataSegment segment = dataSegmentPusher.push(mergedFile, sink.getSegment().withDimensions(Lists.newArrayList(index.getAvailableDimensions())));
objectMapper.writeValue(descriptorFile, segment);
log.info("Pushed merged index for segment[%s], descriptor is: %s", identifier, segment);
return segment;
} catch (Exception e) {
metrics.incrementFailedHandoffs();
log.warn(e, "Failed to push merged index for segment[%s].", identifier);
throw Throwables.propagate(e);
}
}
use of io.druid.segment.Segment in project druid by druid-io.
the class ServerManager method loadSegment.
/**
* Load a single segment.
*
* @param segment segment to load
*
* @return true if the segment was newly loaded, false if it was already loaded
*
* @throws SegmentLoadingException if the segment cannot be loaded
*/
public boolean loadSegment(final DataSegment segment) throws SegmentLoadingException {
final Segment adapter;
try {
adapter = segmentLoader.getSegment(segment);
} catch (SegmentLoadingException e) {
try {
segmentLoader.cleanup(segment);
} catch (SegmentLoadingException e1) {
// ignore
}
throw e;
}
if (adapter == null) {
throw new SegmentLoadingException("Null adapter from loadSpec[%s]", segment.getLoadSpec());
}
synchronized (lock) {
String dataSource = segment.getDataSource();
VersionedIntervalTimeline<String, ReferenceCountingSegment> loadedIntervals = dataSources.get(dataSource);
if (loadedIntervals == null) {
loadedIntervals = new VersionedIntervalTimeline<>(Ordering.natural());
dataSources.put(dataSource, loadedIntervals);
}
PartitionHolder<ReferenceCountingSegment> entry = loadedIntervals.findEntry(segment.getInterval(), segment.getVersion());
if ((entry != null) && (entry.getChunk(segment.getShardSpec().getPartitionNum()) != null)) {
log.warn("Told to load a adapter for a segment[%s] that already exists", segment.getIdentifier());
return false;
}
loadedIntervals.add(segment.getInterval(), segment.getVersion(), segment.getShardSpec().createChunk(new ReferenceCountingSegment(adapter)));
synchronized (dataSourceSizes) {
dataSourceSizes.add(dataSource, segment.getSize());
}
synchronized (dataSourceCounts) {
dataSourceCounts.add(dataSource, 1L);
}
return true;
}
}
use of io.druid.segment.Segment in project druid by druid-io.
the class IncrementalIndexTest method testSingleThreadedIndexingAndQuery.
@Test
public void testSingleThreadedIndexingAndQuery() throws Exception {
final int dimensionCount = 5;
final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>();
ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
}
final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[ingestAggregatorFactories.size()])));
final long timestamp = System.currentTimeMillis();
final int rows = 50;
//ingesting same data twice to have some merging happening
for (int i = 0; i < rows; i++) {
index.add(getLongRow(timestamp + i, i, dimensionCount));
}
for (int i = 0; i < rows; i++) {
index.add(getLongRow(timestamp + i, i, dimensionCount));
}
//run a timeseries query on the index and verify results
final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>();
queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
}
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(new Interval("2000/2030"))).aggregators(queryAggregatorFactories).build();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
final QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, new HashMap<String, Object>()), new LinkedList<Result<TimeseriesResultValue>>());
Result<TimeseriesResultValue> result = Iterables.getOnlyElement(results);
boolean isRollup = index.isRollup();
Assert.assertEquals(rows * (isRollup ? 1 : 2), result.getValue().getLongMetric("rows").intValue());
for (int i = 0; i < dimensionCount; ++i) {
Assert.assertEquals(String.format("Failed long sum on dimension %d", i), 2 * rows, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
Assert.assertEquals(String.format("Failed double sum on dimension %d", i), 2 * rows, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
}
}
use of io.druid.segment.Segment in project druid by druid-io.
the class IncrementalIndexTest method testConcurrentAddRead.
@Test(timeout = 60_000L)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException {
final int dimensionCount = 5;
final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>(dimensionCount + 1);
ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
ingestAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("Dim_%s", i)));
ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("Dim_%s", i)));
}
final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(dimensionCount + 1);
queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
queryAggregatorFactories.add(new LongSumAggregatorFactory(String.format("sumResult%s", i), String.format("sumResult%s", i)));
queryAggregatorFactories.add(new DoubleSumAggregatorFactory(String.format("doubleSumResult%s", i), String.format("doubleSumResult%s", i)));
}
final IncrementalIndex index = closer.closeLater(indexCreator.createIndex(ingestAggregatorFactories.toArray(new AggregatorFactory[dimensionCount])));
final int concurrentThreads = 2;
final int elementsPerThread = 10_000;
final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
final long timestamp = System.currentTimeMillis();
final Interval queryInterval = new Interval("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
final List<ListenableFuture<?>> indexFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
final List<ListenableFuture<?>> queryFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryRunnerTestHelper.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
final AtomicInteger currentlyRunning = new AtomicInteger(0);
final AtomicInteger concurrentlyRan = new AtomicInteger(0);
final AtomicInteger someoneRan = new AtomicInteger(0);
final CountDownLatch startLatch = new CountDownLatch(1);
final CountDownLatch readyLatch = new CountDownLatch(concurrentThreads * 2);
final AtomicInteger queriesAccumualted = new AtomicInteger(0);
for (int j = 0; j < concurrentThreads; j++) {
indexFutures.add(indexExecutor.submit(new Runnable() {
@Override
public void run() {
readyLatch.countDown();
try {
startLatch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
currentlyRunning.incrementAndGet();
try {
for (int i = 0; i < elementsPerThread; i++) {
index.add(getLongRow(timestamp + i, i, dimensionCount));
someoneRan.incrementAndGet();
}
} catch (IndexSizeExceededException e) {
throw Throwables.propagate(e);
}
currentlyRunning.decrementAndGet();
}
}));
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
queryFutures.add(queryExecutor.submit(new Runnable() {
@Override
public void run() {
readyLatch.countDown();
try {
startLatch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw Throwables.propagate(e);
}
while (concurrentlyRan.get() == 0) {
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
Map<String, Object> context = new HashMap<String, Object>();
Sequence<Result<TimeseriesResultValue>> sequence = runner.run(query, context);
for (Double result : sequence.accumulate(new Double[0], new Accumulator<Double[], Result<TimeseriesResultValue>>() {
@Override
public Double[] accumulate(Double[] accumulated, Result<TimeseriesResultValue> in) {
if (currentlyRunning.get() > 0) {
concurrentlyRan.incrementAndGet();
}
queriesAccumualted.incrementAndGet();
return Lists.asList(in.getValue().getDoubleMetric("doubleSumResult0"), accumulated).toArray(new Double[accumulated.length + 1]);
}
})) {
final Integer maxValueExpected = someoneRan.get() + concurrentThreads;
if (maxValueExpected > 0) {
// Eventually consistent, but should be somewhere in that range
// Actual result is validated after all writes are guaranteed done.
Assert.assertTrue(String.format("%d >= %g >= 0 violated", maxValueExpected, result), result >= 0 && result <= maxValueExpected);
}
}
}
}
}));
}
readyLatch.await();
startLatch.countDown();
List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
allFutures.addAll(queryFutures);
allFutures.addAll(indexFutures);
Futures.allAsList(allFutures).get();
Assert.assertTrue("Queries ran too fast", queriesAccumualted.get() > 0);
Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get() > 0);
queryExecutor.shutdown();
indexExecutor.shutdown();
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
Map<String, Object> context = new HashMap<String, Object>();
List<Result<TimeseriesResultValue>> results = Sequences.toList(runner.run(query, context), new LinkedList<Result<TimeseriesResultValue>>());
boolean isRollup = index.isRollup();
for (Result<TimeseriesResultValue> result : results) {
Assert.assertEquals(elementsPerThread * (isRollup ? 1 : concurrentThreads), result.getValue().getLongMetric("rows").intValue());
for (int i = 0; i < dimensionCount; ++i) {
Assert.assertEquals(String.format("Failed long sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getLongMetric(String.format("sumResult%s", i)).intValue());
Assert.assertEquals(String.format("Failed double sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getDoubleMetric(String.format("doubleSumResult%s", i)).intValue());
}
}
}
Aggregations