use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class GroupByRowProcessor method process.
/**
* Process the input of sequence "rows" (output by "subquery") based on "query" and returns a {@link ResultSupplier}.
*
* In addition to grouping using dimensions and metrics, it will also apply filters (both DimFilter and interval
* filters).
*
* The input sequence is processed synchronously with the call to this method, and result iteration happens lazy upon
* calls to the {@link ResultSupplier}. Make sure to close it when you're done.
*/
public static ResultSupplier process(final GroupByQuery query, final GroupByQuery subquery, final Sequence<ResultRow> rows, final GroupByQueryConfig config, final GroupByQueryResource resource, final ObjectMapper spillMapper, final String processingTmpDir, final int mergeBufferSize) {
final Closer closeOnExit = Closer.create();
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final File temporaryStorageDirectory = new File(processingTmpDir, StringUtils.format("druid-groupBy-%s_%s", UUID.randomUUID(), query.getId()));
final LimitedTemporaryStorage temporaryStorage = new LimitedTemporaryStorage(temporaryStorageDirectory, querySpecificConfig.getMaxOnDiskStorage());
closeOnExit.register(temporaryStorage);
Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> pair = RowBasedGrouperHelper.createGrouperAccumulatorPair(query, subquery, querySpecificConfig, new Supplier<ByteBuffer>() {
@Override
public ByteBuffer get() {
final ResourceHolder<ByteBuffer> mergeBufferHolder = resource.getMergeBuffer();
closeOnExit.register(mergeBufferHolder);
return mergeBufferHolder.get();
}
}, temporaryStorage, spillMapper, mergeBufferSize);
final Grouper<RowBasedKey> grouper = pair.lhs;
final Accumulator<AggregateResult, ResultRow> accumulator = pair.rhs;
closeOnExit.register(grouper);
final AggregateResult retVal = rows.accumulate(AggregateResult.ok(), accumulator);
if (!retVal.isOk()) {
throw new ResourceLimitExceededException(retVal.getReason());
}
return new ResultSupplier() {
@Override
public Sequence<ResultRow> results(@Nullable List<DimensionSpec> dimensionsToInclude) {
return getRowsFromGrouper(query, grouper, dimensionsToInclude);
}
@Override
public void close() throws IOException {
closeOnExit.close();
}
};
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class MapVirtualColumnGroupByTest method setup.
@Before
public void setup() throws IOException {
final IncrementalIndex incrementalIndex = MapVirtualColumnTestBase.generateIndex();
final GroupByStrategySelector strategySelector = new GroupByStrategySelector(GroupByQueryConfig::new, null, new GroupByStrategyV2(new DruidProcessingConfig() {
@Override
public String getFormatString() {
return null;
}
@Override
public int intermediateComputeSizeBytes() {
return 10 * 1024 * 1024;
}
@Override
public int getNumMergeBuffers() {
return 1;
}
@Override
public int getNumThreads() {
return 1;
}
}, GroupByQueryConfig::new, new StupidPool<>("map-virtual-column-groupby-test", () -> ByteBuffer.allocate(1024)), new DefaultBlockingPool<>(() -> ByteBuffer.allocate(1024), 1), new DefaultObjectMapper(), QueryRunnerTestHelper.NOOP_QUERYWATCHER));
final GroupByQueryRunnerFactory factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector));
runner = QueryRunnerTestHelper.makeQueryRunner(factory, SegmentId.dummy("index"), new IncrementalIndexSegment(incrementalIndex, SegmentId.dummy("index")), "incremental");
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class FixedBucketsHistogramGroupByQueryTest method constructorFeeder.
@Parameterized.Parameters(name = "{0}")
public static Iterable<Object[]> constructorFeeder() {
final GroupByQueryConfig v1Config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V1;
}
@Override
public String toString() {
return "v1";
}
};
final GroupByQueryConfig v1SingleThreadedConfig = new GroupByQueryConfig() {
@Override
public boolean isSingleThreaded() {
return true;
}
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V1;
}
@Override
public String toString() {
return "v1SingleThreaded";
}
};
final GroupByQueryConfig v2Config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
@Override
public String toString() {
return "v2";
}
};
v1Config.setMaxIntermediateRows(10000);
v1SingleThreadedConfig.setMaxIntermediateRows(10000);
final List<Object[]> constructors = new ArrayList<>();
final List<GroupByQueryConfig> configs = ImmutableList.of(v1Config, v1SingleThreadedConfig, v2Config);
for (GroupByQueryConfig config : configs) {
final Pair<GroupByQueryRunnerFactory, Closer> factoryAndCloser = GroupByQueryRunnerTest.makeQueryRunnerFactory(config);
final GroupByQueryRunnerFactory factory = factoryAndCloser.lhs;
RESOURCE_CLOSER.register(factoryAndCloser.rhs);
for (QueryRunner<ResultRow> runner : QueryRunnerTestHelper.makeQueryRunners(factory)) {
final String testName = StringUtils.format("config=%s, runner=%s", config.toString(), runner.toString());
constructors.add(new Object[] { testName, factory, runner });
}
}
return constructors;
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class CachingClusteredClientBenchmark method setup.
@Setup(Level.Trial)
public void setup() {
final String schemaName = "basic";
parallelCombine = parallelism > 0;
GeneratorSchemaInfo schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
Map<DataSegment, QueryableIndex> queryableIndexes = Maps.newHashMapWithExpectedSize(numServers);
for (int i = 0; i < numServers; i++) {
final DataSegment dataSegment = DataSegment.builder().dataSource(DATA_SOURCE).interval(schemaInfo.getDataInterval()).version("1").shardSpec(new LinearShardSpec(i)).size(0).build();
final SegmentGenerator segmentGenerator = closer.register(new SegmentGenerator());
LOG.info("Starting benchmark setup using cacheDir[%s], rows[%,d].", segmentGenerator.getCacheDir(), rowsPerSegment);
final QueryableIndex index = segmentGenerator.generate(dataSegment, schemaInfo, Granularities.NONE, rowsPerSegment);
queryableIndexes.put(dataSegment, index);
}
final DruidProcessingConfig processingConfig = new DruidProcessingConfig() {
@Override
public String getFormatString() {
return null;
}
@Override
public int intermediateComputeSizeBytes() {
return PROCESSING_BUFFER_SIZE;
}
@Override
public int getNumMergeBuffers() {
return 1;
}
@Override
public int getNumThreads() {
return numProcessingThreads;
}
@Override
public boolean useParallelMergePool() {
return true;
}
};
conglomerate = new DefaultQueryRunnerFactoryConglomerate(ImmutableMap.<Class<? extends Query>, QueryRunnerFactory>builder().put(TimeseriesQuery.class, new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(), new TimeseriesQueryEngine(), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(TopNQuery.class, new TopNQueryRunnerFactory(new StupidPool<>("TopNQueryRunnerFactory-bufferPool", () -> ByteBuffer.allocate(PROCESSING_BUFFER_SIZE)), new TopNQueryQueryToolChest(new TopNQueryConfig()), QueryRunnerTestHelper.NOOP_QUERYWATCHER)).put(GroupByQuery.class, makeGroupByQueryRunnerFactory(GroupByQueryRunnerTest.DEFAULT_MAPPER, new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return GroupByStrategySelector.STRATEGY_V2;
}
}, processingConfig)).build());
toolChestWarehouse = new QueryToolChestWarehouse() {
@Override
public <T, QueryType extends Query<T>> QueryToolChest<T, QueryType> getToolChest(final QueryType query) {
return conglomerate.findFactory(query).getToolchest();
}
};
SimpleServerView serverView = new SimpleServerView();
int serverSuffx = 1;
for (Entry<DataSegment, QueryableIndex> entry : queryableIndexes.entrySet()) {
serverView.addServer(createServer(serverSuffx++), entry.getKey(), entry.getValue());
}
processingPool = Execs.multiThreaded(processingConfig.getNumThreads(), "caching-clustered-client-benchmark");
forkJoinPool = new ForkJoinPool((int) Math.ceil(Runtime.getRuntime().availableProcessors() * 0.75), ForkJoinPool.defaultForkJoinWorkerThreadFactory, null, true);
cachingClusteredClient = new CachingClusteredClient(toolChestWarehouse, serverView, MapCache.create(0), JSON_MAPPER, new ForegroundCachePopulator(JSON_MAPPER, new CachePopulatorStats(), 0), new CacheConfig(), new DruidHttpClientConfig(), processingConfig, forkJoinPool, QueryStackTests.DEFAULT_NOOP_SCHEDULER, new MapJoinableFactory(ImmutableSet.of(), ImmutableMap.of()), new NoopServiceEmitter());
}
use of org.apache.druid.query.groupby.GroupByQueryConfig in project druid by druid-io.
the class GroupByTypeInterfaceBenchmark method setup.
@Setup(Level.Trial)
public void setup() throws IOException {
log.info("SETUP CALLED AT %d", System.currentTimeMillis());
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
setupQueries();
String schemaName = "basic";
schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
stringQuery = SCHEMA_QUERY_MAP.get(schemaName).get("string");
longFloatQuery = SCHEMA_QUERY_MAP.get(schemaName).get("longFloat");
longQuery = SCHEMA_QUERY_MAP.get(schemaName).get("long");
floatQuery = SCHEMA_QUERY_MAP.get(schemaName).get("float");
final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
tmpDir = FileUtils.createTempDir();
log.info("Using temp dir: %s", tmpDir.getAbsolutePath());
// queryableIndexes -> numSegments worth of on-disk segments
// anIncrementalIndex -> the last incremental index
anIncrementalIndex = null;
queryableIndexes = new ArrayList<>(numSegments);
for (int i = 0; i < numSegments; i++) {
log.info("Generating rows for segment %d/%d", i + 1, numSegments);
final IncrementalIndex index = makeIncIndex();
for (int j = 0; j < rowsPerSegment; j++) {
final InputRow row = dataGenerator.nextRow();
if (j % 20000 == 0) {
log.info("%,d/%,d rows generated.", i * rowsPerSegment + j, rowsPerSegment * numSegments);
}
index.add(row);
}
log.info("%,d/%,d rows generated, persisting segment %d/%d.", (i + 1) * rowsPerSegment, rowsPerSegment * numSegments, i + 1, numSegments);
final File file = INDEX_MERGER_V9.persist(index, new File(tmpDir, String.valueOf(i)), new IndexSpec(), null);
queryableIndexes.add(INDEX_IO.loadIndex(file));
if (i == numSegments - 1) {
anIncrementalIndex = index;
} else {
index.close();
}
}
NonBlockingPool<ByteBuffer> bufferPool = new StupidPool<>("GroupByBenchmark-computeBufferPool", new OffheapBufferGenerator("compute", 250_000_000), 0, Integer.MAX_VALUE);
// limit of 2 is required since we simulate both historical merge and broker merge in the same process
BlockingPool<ByteBuffer> mergePool = new DefaultBlockingPool<>(new OffheapBufferGenerator("merge", 250_000_000), 2);
final GroupByQueryConfig config = new GroupByQueryConfig() {
@Override
public String getDefaultStrategy() {
return defaultStrategy;
}
@Override
public int getBufferGrouperInitialBuckets() {
return initialBuckets;
}
@Override
public long getMaxOnDiskStorage() {
return 1_000_000_000L;
}
};
config.setSingleThreaded(false);
config.setMaxIntermediateRows(Integer.MAX_VALUE);
config.setMaxResults(Integer.MAX_VALUE);
DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() {
@Override
public int getNumThreads() {
// Used by "v2" strategy for concurrencyHint
return numProcessingThreads;
}
@Override
public String getFormatString() {
return null;
}
};
final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
final GroupByStrategySelector strategySelector = new GroupByStrategySelector(configSupplier, new GroupByStrategyV1(configSupplier, new GroupByQueryEngine(configSupplier, bufferPool), QueryBenchmarkUtil.NOOP_QUERYWATCHER), new GroupByStrategyV2(druidProcessingConfig, configSupplier, bufferPool, mergePool, new ObjectMapper(new SmileFactory()), QueryBenchmarkUtil.NOOP_QUERYWATCHER));
factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector));
}
Aggregations