use of org.apache.druid.segment.Segment in project druid by druid-io.
the class AbstractMultiPhaseParallelIndexingTest method querySegment.
List<ScanResultValue> querySegment(DataSegment dataSegment, List<String> columns, File tempSegmentDir) {
Segment segment = loadSegment(dataSegment, tempSegmentDir);
final QueryRunner<ScanResultValue> runner = SCAN_QUERY_RUNNER_FACTORY.createRunner(segment);
return runner.run(QueryPlus.wrap(new ScanQuery(new TableDataSource("dataSource"), new SpecificSegmentSpec(new SegmentDescriptor(dataSegment.getInterval(), dataSegment.getVersion(), dataSegment.getShardSpec().getPartitionNum())), null, null, 0, 0, 0, null, null, null, columns, false, null))).toList();
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class OnheapIncrementalIndexBenchmark method testConcurrentAddRead.
@Ignore
@Test
@BenchmarkOptions(callgc = true, clock = Clock.REAL_TIME, warmupRounds = 10, benchmarkRounds = 20)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException, NoSuchMethodException, IllegalAccessException, InvocationTargetException, InstantiationException {
final int taskCount = 30;
final int concurrentThreads = 3;
final int elementsPerThread = 1 << 15;
final IncrementalIndex incrementalIndex = this.incrementalIndex.getConstructor(IncrementalIndexSchema.class, boolean.class, boolean.class, boolean.class, boolean.class, int.class).newInstance(new IncrementalIndexSchema.Builder().withMetrics(factories).build(), true, true, false, true, elementsPerThread * taskCount);
final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(DIMENSION_COUNT + 1);
queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < DIMENSION_COUNT; ++i) {
queryAggregatorFactories.add(new LongSumAggregatorFactory(StringUtils.format("sumResult%s", i), StringUtils.format("sumResult%s", i)));
queryAggregatorFactories.add(new DoubleSumAggregatorFactory(StringUtils.format("doubleSumResult%s", i), StringUtils.format("doubleSumResult%s", i)));
}
final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
final long timestamp = System.currentTimeMillis();
final Interval queryInterval = Intervals.of("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
final List<ListenableFuture<?>> indexFutures = new ArrayList<>();
final List<ListenableFuture<?>> queryFutures = new ArrayList<>();
final Segment incrementalIndexSegment = new IncrementalIndexSegment(incrementalIndex, null);
final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
final AtomicInteger currentlyRunning = new AtomicInteger(0);
final AtomicBoolean concurrentlyRan = new AtomicBoolean(false);
final AtomicBoolean someoneRan = new AtomicBoolean(false);
for (int j = 0; j < taskCount; j++) {
indexFutures.add(indexExecutor.submit(new Runnable() {
@Override
public void run() {
currentlyRunning.incrementAndGet();
try {
for (int i = 0; i < elementsPerThread; i++) {
incrementalIndex.add(getLongRow(timestamp + i, 1, DIMENSION_COUNT));
}
} catch (IndexSizeExceededException e) {
throw new RuntimeException(e);
}
currentlyRunning.decrementAndGet();
someoneRan.set(true);
}
}));
queryFutures.add(queryExecutor.submit(new Runnable() {
@Override
public void run() {
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
List<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
for (Result<TimeseriesResultValue> result : results) {
if (someoneRan.get()) {
Assert.assertTrue(result.getValue().getDoubleMetric("doubleSumResult0") > 0);
}
}
if (currentlyRunning.get() > 0) {
concurrentlyRan.set(true);
}
}
}));
}
List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
allFutures.addAll(queryFutures);
allFutures.addAll(indexFutures);
Futures.allAsList(allFutures).get();
// Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get());
queryExecutor.shutdown();
indexExecutor.shutdown();
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
List<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
final int expectedVal = elementsPerThread * taskCount;
for (Result<TimeseriesResultValue> result : results) {
Assert.assertEquals(elementsPerThread, result.getValue().getLongMetric("rows").intValue());
for (int i = 0; i < DIMENSION_COUNT; ++i) {
Assert.assertEquals(StringUtils.format("Failed long sum on dimension %d", i), expectedVal, result.getValue().getLongMetric(StringUtils.format("sumResult%s", i)).intValue());
Assert.assertEquals(StringUtils.format("Failed double sum on dimension %d", i), expectedVal, result.getValue().getDoubleMetric(StringUtils.format("doubleSumResult%s", i)).intValue());
}
}
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class AppenderatorImpl method persistHydrant.
/**
* Persists the given hydrant and returns the number of rows persisted. Must only be called in the single-threaded
* persistExecutor.
*
* @param indexToPersist hydrant to persist
* @param identifier the segment this hydrant is going to be part of
*
* @return the number of rows persisted
*/
private int persistHydrant(FireHydrant indexToPersist, SegmentIdWithShardSpec identifier) {
synchronized (indexToPersist) {
if (indexToPersist.hasSwapped()) {
log.info("Segment[%s] hydrant[%s] already swapped. Ignoring request to persist.", identifier, indexToPersist);
return 0;
}
log.debug("Segment[%s], persisting Hydrant[%s]", identifier, indexToPersist);
try {
final long startTime = System.nanoTime();
int numRows = indexToPersist.getIndex().size();
final File persistedFile;
final File persistDir = createPersistDirIfNeeded(identifier);
persistedFile = indexMerger.persist(indexToPersist.getIndex(), identifier.getInterval(), new File(persistDir, String.valueOf(indexToPersist.getCount())), tuningConfig.getIndexSpecForIntermediatePersists(), tuningConfig.getSegmentWriteOutMediumFactory());
log.info("Flushed in-memory data for segment[%s] spill[%s] to disk in [%,d] ms (%,d rows).", indexToPersist.getSegmentId(), indexToPersist.getCount(), (System.nanoTime() - startTime) / 1000000, numRows);
// Map only when this appenderator is being driven by a real time task:
Segment segmentToSwap = null;
if (isOpenSegments()) {
segmentToSwap = new QueryableIndexSegment(indexIO.loadIndex(persistedFile), indexToPersist.getSegmentId());
} else {
// remember file path & segment id to rebuild the queryable index for merge:
persistedHydrantMetadata.put(indexToPersist, new Pair<>(persistedFile, indexToPersist.getSegmentId()));
}
indexToPersist.swapSegment(segmentToSwap);
return numRows;
} catch (IOException e) {
log.makeAlert("Incremental persist failed").addData("segment", identifier.toString()).addData("dataSource", schema.getDataSource()).addData("count", indexToPersist.getCount()).emit();
throw new RuntimeException(e);
}
}
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class SegmentLocalCacheLoader method getSegment.
@Override
public ReferenceCountingSegment getSegment(DataSegment segment, boolean lazy, SegmentLazyLoadFailCallback loadFailed) throws SegmentLoadingException {
final File segmentFiles = cacheManager.getSegmentFiles(segment);
File factoryJson = new File(segmentFiles, "factory.json");
final SegmentizerFactory factory;
if (factoryJson.exists()) {
try {
factory = jsonMapper.readValue(factoryJson, SegmentizerFactory.class);
} catch (IOException e) {
throw new SegmentLoadingException(e, "%s", e.getMessage());
}
} else {
factory = new MMappedQueryableSegmentizerFactory(indexIO);
}
Segment segmentObject = factory.factorize(segment, segmentFiles, lazy, loadFailed);
return ReferenceCountingSegment.wrapSegment(segmentObject, segment.getShardSpec());
}
use of org.apache.druid.segment.Segment in project druid by druid-io.
the class IncrementalIndexTest method testConcurrentAddRead.
@Test(timeout = 60_000L)
public void testConcurrentAddRead() throws InterruptedException, ExecutionException {
final int dimensionCount = 5;
final ArrayList<AggregatorFactory> ingestAggregatorFactories = new ArrayList<>(dimensionCount + 1);
ingestAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
ingestAggregatorFactories.add(new LongSumAggregatorFactory(StringUtils.format("sumResult%s", i), StringUtils.format("Dim_%s", i)));
ingestAggregatorFactories.add(new DoubleSumAggregatorFactory(StringUtils.format("doubleSumResult%s", i), StringUtils.format("Dim_%s", i)));
}
final ArrayList<AggregatorFactory> queryAggregatorFactories = new ArrayList<>(dimensionCount + 1);
queryAggregatorFactories.add(new CountAggregatorFactory("rows"));
for (int i = 0; i < dimensionCount; ++i) {
queryAggregatorFactories.add(new LongSumAggregatorFactory(StringUtils.format("sumResult%s", i), StringUtils.format("sumResult%s", i)));
queryAggregatorFactories.add(new DoubleSumAggregatorFactory(StringUtils.format("doubleSumResult%s", i), StringUtils.format("doubleSumResult%s", i)));
}
final IncrementalIndex index = indexCreator.createIndex((Object) ingestAggregatorFactories.toArray(new AggregatorFactory[0]));
final int concurrentThreads = 2;
final int elementsPerThread = 10_000;
final ListeningExecutorService indexExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("index-executor-%d").setPriority(Thread.MIN_PRIORITY).build()));
final ListeningExecutorService queryExecutor = MoreExecutors.listeningDecorator(Executors.newFixedThreadPool(concurrentThreads, new ThreadFactoryBuilder().setDaemon(false).setNameFormat("query-executor-%d").build()));
final long timestamp = System.currentTimeMillis();
final Interval queryInterval = Intervals.of("1900-01-01T00:00:00Z/2900-01-01T00:00:00Z");
final List<ListenableFuture<?>> indexFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
final List<ListenableFuture<?>> queryFutures = Lists.newArrayListWithExpectedSize(concurrentThreads);
final Segment incrementalIndexSegment = new IncrementalIndexSegment(index, null);
final QueryRunnerFactory factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER);
final AtomicInteger currentlyRunning = new AtomicInteger(0);
final AtomicInteger concurrentlyRan = new AtomicInteger(0);
final AtomicInteger someoneRan = new AtomicInteger(0);
final CountDownLatch startLatch = new CountDownLatch(1);
final CountDownLatch readyLatch = new CountDownLatch(concurrentThreads * 2);
final AtomicInteger queriesAccumualted = new AtomicInteger(0);
for (int j = 0; j < concurrentThreads; j++) {
indexFutures.add(indexExecutor.submit(new Runnable() {
@Override
public void run() {
readyLatch.countDown();
try {
startLatch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
currentlyRunning.incrementAndGet();
try {
for (int i = 0; i < elementsPerThread; i++) {
index.add(getLongRow(timestamp + i, dimensionCount));
someoneRan.incrementAndGet();
}
} catch (IndexSizeExceededException e) {
throw new RuntimeException(e);
}
currentlyRunning.decrementAndGet();
}
}));
final TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
queryFutures.add(queryExecutor.submit(new Runnable() {
@Override
public void run() {
readyLatch.countDown();
try {
startLatch.await();
} catch (InterruptedException e) {
Thread.currentThread().interrupt();
throw new RuntimeException(e);
}
while (concurrentlyRan.get() == 0) {
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
Sequence<Result<TimeseriesResultValue>> sequence = runner.run(QueryPlus.wrap(query));
Double[] results = sequence.accumulate(new Double[0], new Accumulator<Double[], Result<TimeseriesResultValue>>() {
@Override
public Double[] accumulate(Double[] accumulated, Result<TimeseriesResultValue> in) {
if (currentlyRunning.get() > 0) {
concurrentlyRan.incrementAndGet();
}
queriesAccumualted.incrementAndGet();
return Lists.asList(in.getValue().getDoubleMetric("doubleSumResult0"), accumulated).toArray(new Double[0]);
}
});
for (Double result : results) {
final Integer maxValueExpected = someoneRan.get() + concurrentThreads;
if (maxValueExpected > 0) {
// Eventually consistent, but should be somewhere in that range
// Actual result is validated after all writes are guaranteed done.
Assert.assertTrue(StringUtils.format("%d >= %g >= 0 violated", maxValueExpected, result), result >= 0 && result <= maxValueExpected);
}
}
}
}
}));
}
readyLatch.await();
startLatch.countDown();
List<ListenableFuture<?>> allFutures = new ArrayList<>(queryFutures.size() + indexFutures.size());
allFutures.addAll(queryFutures);
allFutures.addAll(indexFutures);
Futures.allAsList(allFutures).get();
Assert.assertTrue("Queries ran too fast", queriesAccumualted.get() > 0);
Assert.assertTrue("Did not hit concurrency, please try again", concurrentlyRan.get() > 0);
queryExecutor.shutdown();
indexExecutor.shutdown();
QueryRunner<Result<TimeseriesResultValue>> runner = new FinalizeResultsQueryRunner<Result<TimeseriesResultValue>>(factory.createRunner(incrementalIndexSegment), factory.getToolchest());
TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource("xxx").granularity(Granularities.ALL).intervals(ImmutableList.of(queryInterval)).aggregators(queryAggregatorFactories).build();
List<Result<TimeseriesResultValue>> results = runner.run(QueryPlus.wrap(query)).toList();
boolean isRollup = index.isRollup();
for (Result<TimeseriesResultValue> result : results) {
Assert.assertEquals(elementsPerThread * (isRollup ? 1 : concurrentThreads), result.getValue().getLongMetric("rows").intValue());
for (int i = 0; i < dimensionCount; ++i) {
Assert.assertEquals(StringUtils.format("Failed long sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getLongMetric(StringUtils.format("sumResult%s", i)).intValue());
Assert.assertEquals(StringUtils.format("Failed double sum on dimension %d", i), elementsPerThread * concurrentThreads, result.getValue().getDoubleMetric(StringUtils.format("doubleSumResult%s", i)).intValue());
}
}
}
Aggregations