use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class QuantileSqlAggregatorTest method testQuantileOnFloatAndLongs.
@Test
public void testQuantileOnFloatAndLongs() throws Exception {
try (final DruidPlanner planner = plannerFactory.createPlanner(null)) {
final String sql = "SELECT\n" + "APPROX_QUANTILE(m1, 0.01),\n" + "APPROX_QUANTILE(m1, 0.5, 50),\n" + "APPROX_QUANTILE(m1, 0.98, 200),\n" + "APPROX_QUANTILE(m1, 0.99),\n" + "APPROX_QUANTILE(m1, 0.99) FILTER(WHERE dim1 = 'abc'),\n" + "APPROX_QUANTILE(m1, 0.999) FILTER(WHERE dim1 <> 'abc'),\n" + "APPROX_QUANTILE(m1, 0.999) FILTER(WHERE dim1 = 'abc'),\n" + "APPROX_QUANTILE(cnt, 0.5)\n" + "FROM foo";
final PlannerResult plannerResult = planner.plan(sql);
// Verify results
final List<Object[]> results = Sequences.toList(plannerResult.run(), new ArrayList<Object[]>());
final List<Object[]> expectedResults = ImmutableList.of(new Object[] { 1.0, 3.0, 5.880000114440918, 5.940000057220459, 6.0, 4.994999885559082, 6.0, 1.0 });
Assert.assertEquals(expectedResults.size(), results.size());
for (int i = 0; i < expectedResults.size(); i++) {
Assert.assertArrayEquals(expectedResults.get(i), results.get(i));
}
// Verify query
Assert.assertEquals(Druids.newTimeseriesQueryBuilder().dataSource(CalciteTests.DATASOURCE1).intervals(new MultipleIntervalSegmentSpec(ImmutableList.of(Filtration.eternity()))).granularity(Granularities.ALL).aggregators(ImmutableList.of(new ApproximateHistogramAggregatorFactory("a0:agg", "m1", null, null, null, null), new ApproximateHistogramAggregatorFactory("a2:agg", "m1", 200, null, null, null), new FilteredAggregatorFactory(new ApproximateHistogramAggregatorFactory("a4:agg", "m1", null, null, null, null), new SelectorDimFilter("dim1", "abc", null)), new FilteredAggregatorFactory(new ApproximateHistogramAggregatorFactory("a5:agg", "m1", null, null, null, null), new NotDimFilter(new SelectorDimFilter("dim1", "abc", null))), new ApproximateHistogramAggregatorFactory("a7:agg", "cnt", null, null, null, null))).postAggregators(ImmutableList.<PostAggregator>of(new QuantilePostAggregator("a0", "a0:agg", 0.01f), new QuantilePostAggregator("a1", "a0:agg", 0.50f), new QuantilePostAggregator("a2", "a2:agg", 0.98f), new QuantilePostAggregator("a3", "a0:agg", 0.99f), new QuantilePostAggregator("a4", "a4:agg", 0.99f), new QuantilePostAggregator("a5", "a5:agg", 0.999f), new QuantilePostAggregator("a6", "a4:agg", 0.999f), new QuantilePostAggregator("a7", "a7:agg", 0.50f))).context(ImmutableMap.<String, Object>of("skipEmptyBuckets", true)).build(), Iterables.getOnlyElement(queryLogHook.getRecordedQueries()));
}
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class FilteredAggregatorBenchmark method setup.
@Setup
public void setup() throws IOException {
log.info("SETUP CALLED AT " + System.currentTimeMillis());
if (ComplexMetrics.getSerdeForType("hyperUnique") == null) {
ComplexMetrics.registerSerde("hyperUnique", new HyperUniquesSerde(HyperLogLogHash.getDefault()));
}
schemaInfo = BenchmarkSchemas.SCHEMA_MAP.get(schema);
BenchmarkDataGenerator gen = new BenchmarkDataGenerator(schemaInfo.getColumnSchemas(), RNG_SEED, schemaInfo.getDataInterval(), rowsPerSegment);
incIndex = makeIncIndex(schemaInfo.getAggsArray());
filter = new OrDimFilter(Arrays.asList(new BoundDimFilter("dimSequential", "-1", "-1", true, true, null, null, StringComparators.ALPHANUMERIC), new JavaScriptDimFilter("dimSequential", "function(x) { return false }", null, JavaScriptConfig.getEnabledInstance()), new RegexDimFilter("dimSequential", "X", null), new SearchQueryDimFilter("dimSequential", new ContainsSearchQuerySpec("X", false), null), new InDimFilter("dimSequential", Arrays.asList("X"), null)));
filteredMetrics = new AggregatorFactory[1];
filteredMetrics[0] = new FilteredAggregatorFactory(new CountAggregatorFactory("rows"), filter);
incIndexFilteredAgg = makeIncIndex(filteredMetrics);
inputRows = new ArrayList<>();
for (int j = 0; j < rowsPerSegment; j++) {
InputRow row = gen.nextRow();
if (j % 10000 == 0) {
log.info(j + " rows generated.");
}
incIndex.add(row);
inputRows.add(row);
}
tmpDir = Files.createTempDir();
log.info("Using temp dir: " + tmpDir.getAbsolutePath());
indexFile = INDEX_MERGER_V9.persist(incIndex, tmpDir, new IndexSpec());
qIndex = INDEX_IO.loadIndex(indexFile);
factory = new TimeseriesQueryRunnerFactory(new TimeseriesQueryQueryToolChest(QueryBenchmarkUtil.NoopIntervalChunkingQueryRunnerDecorator()), new TimeseriesQueryEngine(), QueryBenchmarkUtil.NOOP_QUERYWATCHER);
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(filteredMetrics[0]);
query = Druids.newTimeseriesQueryBuilder().dataSource("blah").granularity(Granularities.ALL).intervals(intervalSpec).aggregators(queryAggs).descending(false).build();
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class TopNTypeInterfaceBenchmark method setupQueries.
private void setupQueries() {
// queries for the basic schema
Map<String, TopNQueryBuilder> basicQueries = new LinkedHashMap<>();
BenchmarkSchemaInfo basicSchema = BenchmarkSchemas.SCHEMA_MAP.get("basic");
{
// basic.A
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
queryAggs.add(new LongMaxAggregatorFactory("maxLongUniform", "maxLongUniform"));
queryAggs.add(new DoubleSumAggregatorFactory("sumFloatNormal", "sumFloatNormal"));
queryAggs.add(new DoubleMinAggregatorFactory("minFloatZipf", "minFloatZipf"));
queryAggs.add(new HyperUniquesAggregatorFactory("hyperUniquesMet", "hyper"));
// Use an IdentityExtractionFn to force usage of DimExtractionTopNAlgorithm
TopNQueryBuilder queryBuilderString = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension(new ExtractionDimensionSpec("dimSequential", "dimSequential", IdentityExtractionFn.getInstance())).metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
// DimExtractionTopNAlgorithm is always used for numeric columns
TopNQueryBuilder queryBuilderLong = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metLongUniform").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
TopNQueryBuilder queryBuilderFloat = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("metFloatNormal").metric("sumFloatNormal").intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("string", queryBuilderString);
basicQueries.put("long", queryBuilderLong);
basicQueries.put("float", queryBuilderFloat);
}
{
// basic.numericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.NUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("numericSort", queryBuilderA);
}
{
// basic.alphanumericSort
QuerySegmentSpec intervalSpec = new MultipleIntervalSegmentSpec(Arrays.asList(basicSchema.getDataInterval()));
List<AggregatorFactory> queryAggs = new ArrayList<>();
queryAggs.add(new LongSumAggregatorFactory("sumLongSequential", "sumLongSequential"));
TopNQueryBuilder queryBuilderA = new TopNQueryBuilder().dataSource("blah").granularity(Granularities.ALL).dimension("dimUniform").metric(new DimensionTopNMetricSpec(null, StringComparators.ALPHANUMERIC)).intervals(intervalSpec).aggregators(queryAggs);
basicQueries.put("alphanumericSort", queryBuilderA);
}
SCHEMA_QUERY_MAP.put("basic", basicQueries);
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testPostAggMergedHavingSpec.
@Test
public void testPostAggMergedHavingSpec() {
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 6L, "index", 4420L, QueryRunnerTestHelper.addRowsIndexConstantMetric, (double) (6L + 4420L + 1L)), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 6L, "index", 4416L, QueryRunnerTestHelper.addRowsIndexConstantMetric, (double) (6L + 4416L + 1L)));
GroupByQuery.Builder builder = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setInterval("2011-04-02/2011-04-04").setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("index", "index"))).setPostAggregatorSpecs(ImmutableList.<PostAggregator>of(QueryRunnerTestHelper.addRowsIndexConstant)).setGranularity(new PeriodGranularity(new Period("P1M"), null, null)).setHavingSpec(new OrHavingSpec(ImmutableList.<HavingSpec>of(new GreaterThanHavingSpec(QueryRunnerTestHelper.addRowsIndexConstantMetric, 1000L))));
final GroupByQuery fullQuery = builder.build();
QueryRunner mergedRunner = factory.getToolchest().mergeResults(new QueryRunner<Row>() {
@Override
public Sequence<Row> run(Query<Row> query, Map<String, Object> responseContext) {
// simulate two daily segments
final Query query1 = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-02/2011-04-03"))));
final Query query2 = query.withQuerySegmentSpec(new MultipleIntervalSegmentSpec(Lists.newArrayList(new Interval("2011-04-03/2011-04-04"))));
return new MergeSequence(query.getResultOrdering(), Sequences.simple(Arrays.asList(runner.run(query1, responseContext), runner.run(query2, responseContext))));
}
});
Map<String, Object> context = Maps.newHashMap();
TestHelper.assertExpectedObjects(expectedResults, mergedRunner.run(fullQuery, context), "merged");
}
use of io.druid.query.spec.MultipleIntervalSegmentSpec in project druid by druid-io.
the class GroupByQueryRunnerTest method testSubqueryWithMultipleIntervalsInOuterQueryAndChunkPeriod.
@Test
public void testSubqueryWithMultipleIntervalsInOuterQueryAndChunkPeriod() {
GroupByQuery subquery = GroupByQuery.builder().setDataSource(QueryRunnerTestHelper.dataSource).setQuerySegmentSpec(QueryRunnerTestHelper.firstToThird).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("quality", "alias"))).setDimFilter(new JavaScriptDimFilter("quality", "function(dim){ return true; }", null, JavaScriptConfig.getEnabledInstance())).setAggregatorSpecs(Arrays.asList(QueryRunnerTestHelper.rowsCount, new LongSumAggregatorFactory("idx", "index"), new LongSumAggregatorFactory("indexMaxPlusTen", "indexMaxPlusTen"))).setGranularity(QueryRunnerTestHelper.dayGran).setContext(ImmutableMap.<String, Object>of("chunkPeriod", "P1D")).build();
GroupByQuery query = GroupByQuery.builder().setDataSource(subquery).setQuerySegmentSpec(new MultipleIntervalSegmentSpec(ImmutableList.of(new Interval("2011-04-01T00:00:00.000Z/2011-04-01T23:58:00.000Z"), new Interval("2011-04-02T00:00:00.000Z/2011-04-03T00:00:00.000Z")))).setDimensions(Lists.<DimensionSpec>newArrayList(new DefaultDimensionSpec("alias", "alias"))).setAggregatorSpecs(Arrays.<AggregatorFactory>asList(new LongSumAggregatorFactory("rows", "rows"), new LongSumAggregatorFactory("idx", "idx"))).setGranularity(QueryRunnerTestHelper.dayGran).build();
List<Row> expectedResults = Arrays.asList(GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "automotive", "rows", 1L, "idx", 135L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "business", "rows", 1L, "idx", 118L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "entertainment", "rows", 1L, "idx", 158L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "health", "rows", 1L, "idx", 120L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "mezzanine", "rows", 3L, "idx", 2870L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "news", "rows", 1L, "idx", 121L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "premium", "rows", 3L, "idx", 2900L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "technology", "rows", 1L, "idx", 78L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-01", "alias", "travel", "rows", 1L, "idx", 119L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "automotive", "rows", 1L, "idx", 147L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "business", "rows", 1L, "idx", 112L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "entertainment", "rows", 1L, "idx", 166L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "health", "rows", 1L, "idx", 113L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "mezzanine", "rows", 3L, "idx", 2447L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "news", "rows", 1L, "idx", 114L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "premium", "rows", 3L, "idx", 2505L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "technology", "rows", 1L, "idx", 97L), GroupByQueryRunnerTestHelper.createExpectedRow("2011-04-02", "alias", "travel", "rows", 1L, "idx", 126L));
// Subqueries are handled by the ToolChest
Iterable<Row> results = GroupByQueryRunnerTestHelper.runQuery(factory, runner, query);
TestHelper.assertExpectedObjects(expectedResults, results, "");
}
Aggregations