Search in sources :

Example 6 with GroupByQueryEngine

use of org.apache.druid.query.groupby.GroupByQueryEngine in project druid by apache.

the class IncrementalIndexStorageAdapterTest method testObjectColumnSelectorOnVaryingColumnSchema.

@Test
public void testObjectColumnSelectorOnVaryingColumnSchema() throws Exception {
    IncrementalIndex index = indexCreator.createIndex();
    index.add(new MapBasedInputRow(DateTimes.of("2014-09-01T00:00:00"), Collections.singletonList("billy"), ImmutableMap.of("billy", "hi")));
    index.add(new MapBasedInputRow(DateTimes.of("2014-09-01T01:00:00"), Lists.newArrayList("billy", "sally"), ImmutableMap.of("billy", "hip", "sally", "hop")));
    try (CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("GroupByQueryEngine-bufferPool", () -> ByteBuffer.allocate(50000))) {
        final GroupByQueryEngine engine = new GroupByQueryEngine(Suppliers.ofInstance(new GroupByQueryConfig() {

            @Override
            public int getMaxIntermediateRows() {
                return 5;
            }
        }), pool);
        final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(DateTimes.EPOCH, DateTimes.nowUtc())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).addAggregator(new JavaScriptAggregatorFactory("fieldLength", Arrays.asList("sally", "billy"), "function(current, s, b) { return current + (s == null ? 0 : s.length) + (b == null ? 0 : b.length); }", "function() { return 0; }", "function(a,b) { return a + b; }", JavaScriptConfig.getEnabledInstance())).build(), new IncrementalIndexStorageAdapter(index));
        final List<Row> results = rows.toList();
        Assert.assertEquals(2, results.size());
        MapBasedRow row = (MapBasedRow) results.get(0);
        Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L, "fieldLength", 2.0), row.getEvent());
        row = (MapBasedRow) results.get(1);
        Assert.assertEquals(ImmutableMap.of("billy", "hip", "sally", "hop", "cnt", 1L, "fieldLength", 6.0), row.getEvent());
    }
}
Also used : GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) JavaScriptAggregatorFactory(org.apache.druid.query.aggregation.JavaScriptAggregatorFactory) CloseableStupidPool(org.apache.druid.collections.CloseableStupidPool) ByteBuffer(java.nio.ByteBuffer) MapBasedRow(org.apache.druid.data.input.MapBasedRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) GroupByQueryEngine(org.apache.druid.query.groupby.GroupByQueryEngine) Interval(org.joda.time.Interval) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 7 with GroupByQueryEngine

use of org.apache.druid.query.groupby.GroupByQueryEngine in project druid by druid-io.

the class GroupByTypeInterfaceBenchmark method setup.

@Setup(Level.Trial)
public void setup() throws IOException {
    log.info("SETUP CALLED AT %d", System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    setupQueries();
    String schemaName = "basic";
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
    stringQuery = SCHEMA_QUERY_MAP.get(schemaName).get("string");
    longFloatQuery = SCHEMA_QUERY_MAP.get(schemaName).get("longFloat");
    longQuery = SCHEMA_QUERY_MAP.get(schemaName).get("long");
    floatQuery = SCHEMA_QUERY_MAP.get(schemaName).get("float");
    final DataGenerator dataGenerator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED + 1, schemaInfo.getDataInterval(), rowsPerSegment);
    tmpDir = FileUtils.createTempDir();
    log.info("Using temp dir: %s", tmpDir.getAbsolutePath());
    // queryableIndexes   -> numSegments worth of on-disk segments
    // anIncrementalIndex -> the last incremental index
    anIncrementalIndex = null;
    queryableIndexes = new ArrayList<>(numSegments);
    for (int i = 0; i < numSegments; i++) {
        log.info("Generating rows for segment %d/%d", i + 1, numSegments);
        final IncrementalIndex index = makeIncIndex();
        for (int j = 0; j < rowsPerSegment; j++) {
            final InputRow row = dataGenerator.nextRow();
            if (j % 20000 == 0) {
                log.info("%,d/%,d rows generated.", i * rowsPerSegment + j, rowsPerSegment * numSegments);
            }
            index.add(row);
        }
        log.info("%,d/%,d rows generated, persisting segment %d/%d.", (i + 1) * rowsPerSegment, rowsPerSegment * numSegments, i + 1, numSegments);
        final File file = INDEX_MERGER_V9.persist(index, new File(tmpDir, String.valueOf(i)), new IndexSpec(), null);
        queryableIndexes.add(INDEX_IO.loadIndex(file));
        if (i == numSegments - 1) {
            anIncrementalIndex = index;
        } else {
            index.close();
        }
    }
    NonBlockingPool<ByteBuffer> bufferPool = new StupidPool<>("GroupByBenchmark-computeBufferPool", new OffheapBufferGenerator("compute", 250_000_000), 0, Integer.MAX_VALUE);
    // limit of 2 is required since we simulate both historical merge and broker merge in the same process
    BlockingPool<ByteBuffer> mergePool = new DefaultBlockingPool<>(new OffheapBufferGenerator("merge", 250_000_000), 2);
    final GroupByQueryConfig config = new GroupByQueryConfig() {

        @Override
        public String getDefaultStrategy() {
            return defaultStrategy;
        }

        @Override
        public int getBufferGrouperInitialBuckets() {
            return initialBuckets;
        }

        @Override
        public long getMaxOnDiskStorage() {
            return 1_000_000_000L;
        }
    };
    config.setSingleThreaded(false);
    config.setMaxIntermediateRows(Integer.MAX_VALUE);
    config.setMaxResults(Integer.MAX_VALUE);
    DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() {

        @Override
        public int getNumThreads() {
            // Used by "v2" strategy for concurrencyHint
            return numProcessingThreads;
        }

        @Override
        public String getFormatString() {
            return null;
        }
    };
    final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
    final GroupByStrategySelector strategySelector = new GroupByStrategySelector(configSupplier, new GroupByStrategyV1(configSupplier, new GroupByQueryEngine(configSupplier, bufferPool), QueryBenchmarkUtil.NOOP_QUERYWATCHER), new GroupByStrategyV2(druidProcessingConfig, configSupplier, bufferPool, mergePool, new ObjectMapper(new SmileFactory()), QueryBenchmarkUtil.NOOP_QUERYWATCHER));
    factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector));
}
Also used : GroupByStrategySelector(org.apache.druid.query.groupby.strategy.GroupByStrategySelector) IndexSpec(org.apache.druid.segment.IndexSpec) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) GroupByStrategyV1(org.apache.druid.query.groupby.strategy.GroupByStrategyV1) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) DefaultBlockingPool(org.apache.druid.collections.DefaultBlockingPool) GroupByQueryEngine(org.apache.druid.query.groupby.GroupByQueryEngine) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) GroupByQueryRunnerFactory(org.apache.druid.query.groupby.GroupByQueryRunnerFactory) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) ByteBuffer(java.nio.ByteBuffer) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) OffheapBufferGenerator(org.apache.druid.offheap.OffheapBufferGenerator) DataGenerator(org.apache.druid.segment.generator.DataGenerator) InputRow(org.apache.druid.data.input.InputRow) StupidPool(org.apache.druid.collections.StupidPool) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) File(java.io.File) Setup(org.openjdk.jmh.annotations.Setup)

Example 8 with GroupByQueryEngine

use of org.apache.druid.query.groupby.GroupByQueryEngine in project druid by druid-io.

the class CachingClusteredClientBenchmark method makeGroupByQueryRunnerFactory.

private static GroupByQueryRunnerFactory makeGroupByQueryRunnerFactory(final ObjectMapper mapper, final GroupByQueryConfig config, final DruidProcessingConfig processingConfig) {
    final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
    final Supplier<ByteBuffer> bufferSupplier = () -> ByteBuffer.allocateDirect(processingConfig.intermediateComputeSizeBytes());
    final NonBlockingPool<ByteBuffer> bufferPool = new StupidPool<>("GroupByQueryEngine-bufferPool", bufferSupplier);
    final BlockingPool<ByteBuffer> mergeBufferPool = new DefaultBlockingPool<>(bufferSupplier, processingConfig.getNumMergeBuffers());
    final GroupByStrategySelector strategySelector = new GroupByStrategySelector(configSupplier, new GroupByStrategyV1(configSupplier, new GroupByQueryEngine(configSupplier, bufferPool), QueryRunnerTestHelper.NOOP_QUERYWATCHER), new GroupByStrategyV2(processingConfig, configSupplier, bufferPool, mergeBufferPool, mapper, QueryRunnerTestHelper.NOOP_QUERYWATCHER));
    final GroupByQueryQueryToolChest toolChest = new GroupByQueryQueryToolChest(strategySelector);
    return new GroupByQueryRunnerFactory(strategySelector, toolChest);
}
Also used : GroupByStrategySelector(org.apache.druid.query.groupby.strategy.GroupByStrategySelector) GroupByQueryRunnerFactory(org.apache.druid.query.groupby.GroupByQueryRunnerFactory) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) ByteBuffer(java.nio.ByteBuffer) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) GroupByStrategyV1(org.apache.druid.query.groupby.strategy.GroupByStrategyV1) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) DefaultBlockingPool(org.apache.druid.collections.DefaultBlockingPool) StupidPool(org.apache.druid.collections.StupidPool) GroupByQueryEngine(org.apache.druid.query.groupby.GroupByQueryEngine)

Example 9 with GroupByQueryEngine

use of org.apache.druid.query.groupby.GroupByQueryEngine in project druid by druid-io.

the class GroupByBenchmark method setup.

/**
 * Setup everything common for benchmarking both the incremental-index and the queriable-index.
 */
@Setup(Level.Trial)
public void setup() {
    log.info("SETUP CALLED AT " + +System.currentTimeMillis());
    ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde());
    setupQueries();
    String[] schemaQuery = schemaAndQuery.split("\\.");
    String schemaName = schemaQuery[0];
    String queryName = schemaQuery[1];
    schemaInfo = GeneratorBasicSchemas.SCHEMA_MAP.get(schemaName);
    query = SCHEMA_QUERY_MAP.get(schemaName).get(queryName);
    generator = new DataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
    NonBlockingPool<ByteBuffer> bufferPool = new StupidPool<>("GroupByBenchmark-computeBufferPool", new OffheapBufferGenerator("compute", 250_000_000), 0, Integer.MAX_VALUE);
    // limit of 2 is required since we simulate both historical merge and broker merge in the same process
    BlockingPool<ByteBuffer> mergePool = new DefaultBlockingPool<>(new OffheapBufferGenerator("merge", 250_000_000), 2);
    final GroupByQueryConfig config = new GroupByQueryConfig() {

        @Override
        public String getDefaultStrategy() {
            return defaultStrategy;
        }

        @Override
        public int getBufferGrouperInitialBuckets() {
            return initialBuckets;
        }

        @Override
        public long getMaxOnDiskStorage() {
            return 1_000_000_000L;
        }
    };
    config.setSingleThreaded(false);
    config.setMaxIntermediateRows(Integer.MAX_VALUE);
    config.setMaxResults(Integer.MAX_VALUE);
    DruidProcessingConfig druidProcessingConfig = new DruidProcessingConfig() {

        @Override
        public int getNumThreads() {
            // Used by "v2" strategy for concurrencyHint
            return numProcessingThreads;
        }

        @Override
        public String getFormatString() {
            return null;
        }
    };
    final Supplier<GroupByQueryConfig> configSupplier = Suppliers.ofInstance(config);
    final GroupByStrategySelector strategySelector = new GroupByStrategySelector(configSupplier, new GroupByStrategyV1(configSupplier, new GroupByQueryEngine(configSupplier, bufferPool), QueryBenchmarkUtil.NOOP_QUERYWATCHER), new GroupByStrategyV2(druidProcessingConfig, configSupplier, bufferPool, mergePool, new ObjectMapper(new SmileFactory()), QueryBenchmarkUtil.NOOP_QUERYWATCHER));
    factory = new GroupByQueryRunnerFactory(strategySelector, new GroupByQueryQueryToolChest(strategySelector));
}
Also used : GroupByStrategySelector(org.apache.druid.query.groupby.strategy.GroupByStrategySelector) GroupByQueryRunnerFactory(org.apache.druid.query.groupby.GroupByQueryRunnerFactory) GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) HyperUniquesSerde(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesSerde) ByteBuffer(java.nio.ByteBuffer) GroupByQueryQueryToolChest(org.apache.druid.query.groupby.GroupByQueryQueryToolChest) SmileFactory(com.fasterxml.jackson.dataformat.smile.SmileFactory) OffheapBufferGenerator(org.apache.druid.offheap.OffheapBufferGenerator) GroupByStrategyV1(org.apache.druid.query.groupby.strategy.GroupByStrategyV1) GroupByStrategyV2(org.apache.druid.query.groupby.strategy.GroupByStrategyV2) DefaultBlockingPool(org.apache.druid.collections.DefaultBlockingPool) DataGenerator(org.apache.druid.segment.generator.DataGenerator) StupidPool(org.apache.druid.collections.StupidPool) DruidProcessingConfig(org.apache.druid.query.DruidProcessingConfig) GroupByQueryEngine(org.apache.druid.query.groupby.GroupByQueryEngine) DefaultObjectMapper(org.apache.druid.jackson.DefaultObjectMapper) ObjectMapper(com.fasterxml.jackson.databind.ObjectMapper) Setup(org.openjdk.jmh.annotations.Setup)

Example 10 with GroupByQueryEngine

use of org.apache.druid.query.groupby.GroupByQueryEngine in project druid by apache.

the class IncrementalIndexStorageAdapterTest method testFilterByNull.

@Test
public void testFilterByNull() throws Exception {
    IncrementalIndex index = indexCreator.createIndex();
    index.add(new MapBasedInputRow(System.currentTimeMillis() - 1, Collections.singletonList("billy"), ImmutableMap.of("billy", "hi")));
    index.add(new MapBasedInputRow(System.currentTimeMillis() - 1, Collections.singletonList("sally"), ImmutableMap.of("sally", "bo")));
    try (CloseableStupidPool<ByteBuffer> pool = new CloseableStupidPool<>("GroupByQueryEngine-bufferPool", () -> ByteBuffer.allocate(50000))) {
        final GroupByQueryEngine engine = new GroupByQueryEngine(Suppliers.ofInstance(new GroupByQueryConfig() {

            @Override
            public int getMaxIntermediateRows() {
                return 5;
            }
        }), pool);
        final Sequence<Row> rows = engine.process(GroupByQuery.builder().setDataSource("test").setGranularity(Granularities.ALL).setInterval(new Interval(DateTimes.EPOCH, DateTimes.nowUtc())).addDimension("billy").addDimension("sally").addAggregator(new LongSumAggregatorFactory("cnt", "cnt")).setDimFilter(DimFilters.dimEquals("sally", (String) null)).build(), new IncrementalIndexStorageAdapter(index));
        final List<Row> results = rows.toList();
        Assert.assertEquals(1, results.size());
        MapBasedRow row = (MapBasedRow) results.get(0);
        Assert.assertEquals(ImmutableMap.of("billy", "hi", "cnt", 1L), row.getEvent());
    }
}
Also used : GroupByQueryConfig(org.apache.druid.query.groupby.GroupByQueryConfig) LongSumAggregatorFactory(org.apache.druid.query.aggregation.LongSumAggregatorFactory) CloseableStupidPool(org.apache.druid.collections.CloseableStupidPool) ByteBuffer(java.nio.ByteBuffer) MapBasedRow(org.apache.druid.data.input.MapBasedRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) MapBasedRow(org.apache.druid.data.input.MapBasedRow) Row(org.apache.druid.data.input.Row) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) GroupByQueryEngine(org.apache.druid.query.groupby.GroupByQueryEngine) Interval(org.joda.time.Interval) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

ByteBuffer (java.nio.ByteBuffer)12 GroupByQueryConfig (org.apache.druid.query.groupby.GroupByQueryConfig)12 GroupByQueryEngine (org.apache.druid.query.groupby.GroupByQueryEngine)12 CloseableStupidPool (org.apache.druid.collections.CloseableStupidPool)6 DefaultBlockingPool (org.apache.druid.collections.DefaultBlockingPool)6 StupidPool (org.apache.druid.collections.StupidPool)6 MapBasedInputRow (org.apache.druid.data.input.MapBasedInputRow)6 MapBasedRow (org.apache.druid.data.input.MapBasedRow)6 Row (org.apache.druid.data.input.Row)6 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)6 GroupByQueryQueryToolChest (org.apache.druid.query.groupby.GroupByQueryQueryToolChest)6 GroupByQueryRunnerFactory (org.apache.druid.query.groupby.GroupByQueryRunnerFactory)6 GroupByStrategySelector (org.apache.druid.query.groupby.strategy.GroupByStrategySelector)6 GroupByStrategyV1 (org.apache.druid.query.groupby.strategy.GroupByStrategyV1)6 GroupByStrategyV2 (org.apache.druid.query.groupby.strategy.GroupByStrategyV2)6 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)6 Interval (org.joda.time.Interval)6 Test (org.junit.Test)6 ObjectMapper (com.fasterxml.jackson.databind.ObjectMapper)4 SmileFactory (com.fasterxml.jackson.dataformat.smile.SmileFactory)4