use of org.apache.druid.client.cache.ForegroundCachePopulator in project druid by druid-io.
the class CachingClusteredClientBenchmark method setup.
@Setup(Level.Trial)
public void setup() {
final String schemaName = "basic";
parallelCombine = parallelism > 0;
GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
Map<DataSegment, QueryableIndex> queryableIndexes = Maps.newHashMapWithExpectedSize(numServers);
for (int i = 0; i < numServers; i++) {
final DataSegment dataSegment = DataSegment.builder().dataSource(DATA_SOURCE).interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(i)).size(0).build();
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
LOG.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment);
final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment);
queryableIndexes.put(dataSegment, index);
}
final DruidProcessingConfig processingConfig = new DruidProcessingConfig() {
@Override
public String getFormatString() {
return null;
}
@Override
public int intermediateComputeSizeBytes() {
return PROCESSING_BUFFER_SIZE;
}
@Override
public int getNumMergeBuffers() {
return 1;
}
@Override
public int getNumThreads() {
return numProcessingThreads;
}
@Override
public boolean useParallelMergePool() {
return true;
}
};
conglomerate = new DefaultQueryRunnerFactoryConglomerate(ImmutableMap.<Class<? extends Query>, QueryRunnerFactory>builder().put(TimeseriesQuery.class, new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(TopNQuery.class, new TopNQueryRunnerFactory(new StupidPool<>("TopNQueryRunnerFactory-bufferPool", () -> ByteBuffer.allocate(PROCESSING_BUFFER_SIZE)), new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(GroupByQuery.class, makeGroupByQueryRunnerFactory(GroupByQueryRunnerTest.DEFAULT_MAPPER, new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
}, processingConfig)).build());
toolChestWarehouse = new QueryToolChestWarehouse() {
@Override
public <T, QueryType extends Query<T>> QueryToolChest<T, QueryType> getToolChest(final QueryType query) {
return conglomerate.findFactory(query).getToolchest();
}
};
SimpleServerView serverView = new SimpleServerView();
int serverSuffx = 1;
for (Entry<DataSegment, QueryableIndex> entry : queryableIndexes.entrySet()) {
serverView.addServer(createServer(serverSuffx++), entry.getKey(), entry.getValue());
}
processingPool = Execs.multiThreaded(processingConfig.getNumThreads(), "caching-clustered-client-benchmark");
forkJoinPool = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * 0.75), ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true);
cachingClusteredClient = new CachingClusteredClient(toolChestWarehouse, serverView, MapCache.create(0), JSON_MAPPER, new ForegroundCachePopulator(JSON_MAPPER, new CachePopulatorStats(), 0), new CacheConfig(), new DruidHttpClientConfig(), processingConfig, forkJoinPool, QueryStackTests.DEFAULT_NOOP_SCHEDULER, new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()), new NoopServiceEmitter());
}
use of org.apache.druid.client.cache.ForegroundCachePopulator in project druid by druid-io.
the class SinkQuerySegmentWalker method getQueryRunnerForSegments.
@Override
public <T> QueryRunner<T> getQueryRunnerForSegments(final Query<T> query, final Iterable<SegmentDescriptor> specs) {
// We only handle one particular dataSource. Make sure that's what we have, then ignore from here on out.
final DataSourceAnalysis analysis = DataSourceAnalysis.forDataSource(query.getDataSource());
// Sanity check: make sure the query is based on the table we're meant to handle.
if (!analysis.getBaseTableDataSource().filter(ds -> dataSource.equals(ds.getName())).isPresent()) {
throw new ISE("Cannot handle datasource: %s", analysis.getDataSource());
}
final QueryRunnerFactory<T, Query<T>> factory = conglomerate.findFactory(query);
if (factory == null) {
throw new ISE("Unknown query type[%s].", query.getClass());
}
final QueryToolChest<T, Query<T>> toolChest = factory.getToolchest();
final boolean skipIncrementalSegment = query.getContextValue(CONTEXT_SKIP_INCREMENTAL_SEGMENT, false);
final AtomicLong cpuTimeAccumulator = new AtomicLong(0L);
// Make sure this query type can handle the subquery, if present.
if (analysis.isQuery() && !toolChest.canPerformSubquery(((QueryDataSource) analysis.getDataSource()).getQuery())) {
throw new ISE("Cannot handle subquery: %s", analysis.getDataSource());
}
// segmentMapFn maps each base Segment into a joined Segment if necessary.
final Function<SegmentReference, SegmentReference> segmentMapFn = joinableFactoryWrapper.createSegmentMapFn(analysis.getJoinBaseTableFilter().map(Filters::toFilter).orElse(null), analysis.getPreJoinableClauses(), cpuTimeAccumulator, analysis.getBaseQuery().orElse(query));
// We compute the join cache key here itself so it doesn't need to be re-computed for every segment
final Optional<byte[]> cacheKeyPrefix = analysis.isJoin() ? joinableFactoryWrapper.computeJoinDataSourceCacheKey(analysis) : Optional.of(StringUtils.EMPTY_BYTES);
Iterable<QueryRunner<T>> perSegmentRunners = Iterables.transform(specs, descriptor -> {
final PartitionChunk<Sink> chunk = sinkTimeline.findChunk(descriptor.getInterval(), descriptor.getVersion(), descriptor.getPartitionNumber());
if (chunk == null) {
return new ReportTimelineMissingSegmentQueryRunner<>(descriptor);
}
final Sink theSink = chunk.getObject();
final SegmentId sinkSegmentId = theSink.getSegment().getId();
Iterable<QueryRunner<T>> perHydrantRunners = new SinkQueryRunners<>(Iterables.transform(theSink, hydrant -> {
// Hydrant might swap at any point, but if it's swapped at the start
// then we know it's *definitely* swapped.
final boolean hydrantDefinitelySwapped = hydrant.hasSwapped();
if (skipIncrementalSegment && !hydrantDefinitelySwapped) {
return new Pair<>(hydrant.getSegmentDataInterval(), new NoopQueryRunner<>());
}
// Prevent the underlying segment from swapping when its being iterated
final Optional<Pair<SegmentReference, Closeable>> maybeSegmentAndCloseable = hydrant.getSegmentForQuery(segmentMapFn);
// if optional isn't present, we failed to acquire reference to the segment or any joinables
if (!maybeSegmentAndCloseable.isPresent()) {
return new Pair<>(hydrant.getSegmentDataInterval(), new ReportTimelineMissingSegmentQueryRunner<>(descriptor));
}
final Pair<SegmentReference, Closeable> segmentAndCloseable = maybeSegmentAndCloseable.get();
try {
QueryRunner<T> runner = factory.createRunner(segmentAndCloseable.lhs);
// 2) Hydrants are not the same between replicas, make sure cache is local
if (hydrantDefinitelySwapped && cache.isLocal()) {
StorageAdapter storageAdapter = segmentAndCloseable.lhs.asStorageAdapter();
long segmentMinTime = storageAdapter.getMinTime().getMillis();
long segmentMaxTime = storageAdapter.getMaxTime().getMillis();
Interval actualDataInterval = Intervals.utc(segmentMinTime, segmentMaxTime + 1);
runner = new CachingQueryRunner<>(makeHydrantCacheIdentifier(hydrant), cacheKeyPrefix, descriptor, actualDataInterval, objectMapper, cache, toolChest, runner, // Always populate in foreground regardless of config
new ForegroundCachePopulator(objectMapper, cachePopulatorStats, cacheConfig.getMaxEntrySize()), cacheConfig);
}
// Make it always use Closeable to decrement()
runner = QueryRunnerHelper.makeClosingQueryRunner(runner, segmentAndCloseable.rhs);
return new Pair<>(segmentAndCloseable.lhs.getDataInterval(), runner);
} catch (Throwable e) {
throw CloseableUtils.closeAndWrapInCatch(e, segmentAndCloseable.rhs);
}
}));
return new SpecificSegmentQueryRunner<>(withPerSinkMetrics(new BySegmentQueryRunner<>(sinkSegmentId, descriptor.getInterval().getStart(), factory.mergeRunners(DirectQueryProcessingPool.INSTANCE, perHydrantRunners)), toolChest, sinkSegmentId, cpuTimeAccumulator), new SpecificSegmentSpec(descriptor));
});
final QueryRunner<T> mergedRunner = toolChest.mergeResults(factory.mergeRunners(queryProcessingPool, perSegmentRunners));
return CPUTimeMetricQueryRunner.safeBuild(new FinalizeResultsQueryRunner<>(mergedRunner, toolChest), toolChest, emitter, cpuTimeAccumulator, true);
}
use of org.apache.druid.client.cache.ForegroundCachePopulator in project druid by druid-io.
the class CachingClusteredClientFunctionalityTest method setUp.
@Before
public void setUp() {
timeline = new VersionedIntervalTimeline<>(Ordering.natural());
serverView = EasyMock.createNiceMock(TimelineServerView.class);
cache = MapCache.create(100000);
client = makeClient(new ForegroundCachePopulator(OBJECT_MAPPER, new CachePopulatorStats(), -1));
}
Aggregations