Search in sources :

Example 51 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class Expressions method toPostAggregator.

/**
   * Translate a Calcite row-expression to a Druid PostAggregator. One day, when possible, this could be folded
   * into {@link #toRowExtraction(DruidOperatorTable, PlannerContext, List, RexNode)} .
   *
   * @param name                              name of the PostAggregator
   * @param rowOrder                          order of fields in the Druid rows to be extracted from
   * @param finalizingPostAggregatorFactories post-aggregators that should be used for specific entries in rowOrder.
   *                                          May be empty, and individual values may be null. Missing or null values
   *                                          will lead to creation of {@link FieldAccessPostAggregator}.
   * @param expression                        expression meant to be applied on top of the rows
   *
   * @return PostAggregator or null if not possible
   */
public static PostAggregator toPostAggregator(final String name, final List<String> rowOrder, final List<PostAggregatorFactory> finalizingPostAggregatorFactories, final RexNode expression) {
    final PostAggregator retVal;
    if (expression.getKind() == SqlKind.INPUT_REF) {
        final RexInputRef ref = (RexInputRef) expression;
        final PostAggregatorFactory finalizingPostAggregatorFactory = finalizingPostAggregatorFactories.get(ref.getIndex());
        retVal = finalizingPostAggregatorFactory != null ? finalizingPostAggregatorFactory.factorize(name) : new FieldAccessPostAggregator(name, rowOrder.get(ref.getIndex()));
    } else if (expression.getKind() == SqlKind.CAST) {
        // Ignore CAST when translating to PostAggregators and hope for the best. They are really loosey-goosey with
        // types internally and there isn't much we can do to respect
        // TODO(gianm): Probably not a good idea to ignore CAST like this.
        final RexNode operand = ((RexCall) expression).getOperands().get(0);
        retVal = toPostAggregator(name, rowOrder, finalizingPostAggregatorFactories, operand);
    } else if (expression.getKind() == SqlKind.LITERAL && SqlTypeName.NUMERIC_TYPES.contains(expression.getType().getSqlTypeName())) {
        retVal = new ConstantPostAggregator(name, (Number) RexLiteral.value(expression));
    } else if (expression.getKind() == SqlKind.TIMES || expression.getKind() == SqlKind.DIVIDE || expression.getKind() == SqlKind.PLUS || expression.getKind() == SqlKind.MINUS) {
        final String fnName = ImmutableMap.<SqlKind, String>builder().put(SqlKind.TIMES, "*").put(SqlKind.DIVIDE, "quotient").put(SqlKind.PLUS, "+").put(SqlKind.MINUS, "-").build().get(expression.getKind());
        final List<PostAggregator> operands = Lists.newArrayList();
        for (RexNode operand : ((RexCall) expression).getOperands()) {
            final PostAggregator translatedOperand = toPostAggregator(null, rowOrder, finalizingPostAggregatorFactories, operand);
            if (translatedOperand == null) {
                return null;
            }
            operands.add(translatedOperand);
        }
        retVal = new ArithmeticPostAggregator(name, fnName, operands);
    } else {
        // Try converting to a math expression.
        final String mathExpression = Expressions.toMathExpression(rowOrder, expression);
        if (mathExpression == null) {
            retVal = null;
        } else {
            retVal = new ExpressionPostAggregator(name, mathExpression);
        }
    }
    if (retVal != null && name != null && !name.equals(retVal.getName())) {
        throw new ISE("WTF?! Was about to return a PostAggregator with bad name, [%s] != [%s]", name, retVal.getName());
    }
    return retVal;
}
Also used : ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) PostAggregatorFactory(io.druid.sql.calcite.aggregation.PostAggregatorFactory) RexCall(org.apache.calcite.rex.RexCall) ExpressionPostAggregator(io.druid.query.aggregation.post.ExpressionPostAggregator) RexInputRef(org.apache.calcite.rex.RexInputRef) List(java.util.List) ISE(io.druid.java.util.common.ISE) RexNode(org.apache.calcite.rex.RexNode)

Example 52 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class SchemalessTestFullTest method testFullOnTimeseries.

private void testFullOnTimeseries(QueryRunner runner, List<Result<TimeseriesResultValue>> expectedResults, String failMsg) {
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(dataSource).granularity(allGran).intervals(fullOnInterval).aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)).build();
    failMsg += " timeseries ";
    HashMap<String, Object> context = new HashMap<>();
    Iterable<Result<TimeseriesResultValue>> actualResults = Sequences.toList(runner.run(query, context), Lists.<Result<TimeseriesResultValue>>newArrayList());
    TestHelper.assertExpectedResults(expectedResults, actualResults, failMsg);
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) Result(io.druid.query.Result)

Example 53 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class SchemalessTestFullTest method testFilteredTimeseries.

private void testFilteredTimeseries(QueryRunner runner, List<Result<TimeseriesResultValue>> expectedResults, String failMsg) {
    TimeseriesQuery query = Druids.newTimeseriesQueryBuilder().dataSource(dataSource).granularity(allGran).intervals(fullOnInterval).filters(marketDimension, "spot").aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)).build();
    failMsg += " filtered timeseries ";
    HashMap<String, Object> context = new HashMap<>();
    Iterable<Result<TimeseriesResultValue>> actualResults = Sequences.toList(runner.run(query, context), Lists.<Result<TimeseriesResultValue>>newArrayList());
    TestHelper.assertExpectedResults(expectedResults, actualResults, failMsg);
}
Also used : TimeseriesResultValue(io.druid.query.timeseries.TimeseriesResultValue) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) TimeseriesQuery(io.druid.query.timeseries.TimeseriesQuery) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) HashMap(java.util.HashMap) LinkedHashMap(java.util.LinkedHashMap) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) Result(io.druid.query.Result)

Example 54 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project druid by druid-io.

the class SchemalessTestSimpleTest method testFullOnTopN.

//  @Test TODO: Handling of null values is inconsistent right now, need to make it all consistent and re-enable test
// TODO: Complain to Eric when you see this.  It shouldn't be like this...
public void testFullOnTopN() {
    TopNQuery query = new TopNQueryBuilder().dataSource(dataSource).granularity(allGran).dimension(marketDimension).metric(indexMetric).threshold(3).intervals(fullOnInterval).aggregators(Lists.<AggregatorFactory>newArrayList(Iterables.concat(commonAggregators, Lists.newArrayList(new DoubleMaxAggregatorFactory("maxIndex", "index"), new DoubleMinAggregatorFactory("minIndex", "index"))))).postAggregators(Arrays.<PostAggregator>asList(addRowsIndexConstant)).build();
    List<Result<TopNResultValue>> expectedResults = Arrays.asList(new Result<TopNResultValue>(new DateTime("2011-01-12T00:00:00.000Z"), new TopNResultValue(Arrays.<DimensionAndMetricValueExtractor>asList(new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "spot").put("rows", 4L).put("index", 400.0D).put("addRowsIndexConstant", 405.0D).put("uniques", 1.0002442201269182D).put("maxIndex", 100.0).put("minIndex", 100.0).build()), new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "").put("rows", 2L).put("index", 200.0D).put("addRowsIndexConstant", 203.0D).put("uniques", 0.0).put("maxIndex", 100.0D).put("minIndex", 100.0D).build()), new DimensionAndMetricValueExtractor(ImmutableMap.<String, Object>builder().put("market", "total_market").put("rows", 2L).put("index", 200.0D).put("addRowsIndexConstant", 203.0D).put("uniques", 1.0002442201269182D).put("maxIndex", 100.0D).put("minIndex", 100.0D).build())))));
    QueryRunner runner = TestQueryRunners.makeTopNQueryRunner(segment);
    HashMap<String, Object> context = new HashMap<String, Object>();
    TestHelper.assertExpectedResults(expectedResults, runner.run(query, context));
}
Also used : TopNQueryBuilder(io.druid.query.topn.TopNQueryBuilder) TopNResultValue(io.druid.query.topn.TopNResultValue) DoubleMaxAggregatorFactory(io.druid.query.aggregation.DoubleMaxAggregatorFactory) PostAggregator(io.druid.query.aggregation.PostAggregator) FieldAccessPostAggregator(io.druid.query.aggregation.post.FieldAccessPostAggregator) ArithmeticPostAggregator(io.druid.query.aggregation.post.ArithmeticPostAggregator) ConstantPostAggregator(io.druid.query.aggregation.post.ConstantPostAggregator) HashMap(java.util.HashMap) DoubleMinAggregatorFactory(io.druid.query.aggregation.DoubleMinAggregatorFactory) DateTime(org.joda.time.DateTime) QueryRunner(io.druid.query.QueryRunner) Result(io.druid.query.Result) TopNQuery(io.druid.query.topn.TopNQuery) DimensionAndMetricValueExtractor(io.druid.query.topn.DimensionAndMetricValueExtractor)

Example 55 with PostAggregator

use of io.druid.query.aggregation.PostAggregator in project hive by apache.

the class DruidSerDe method inferSchema.

/* GroupBy query */
private void inferSchema(GroupByQuery query, List<String> columnNames, List<PrimitiveTypeInfo> columnTypes) {
    // Timestamp column
    columnNames.add(DruidTable.DEFAULT_TIMESTAMP_COLUMN);
    columnTypes.add(TypeInfoFactory.timestampTypeInfo);
    // Dimension columns
    for (DimensionSpec ds : query.getDimensions()) {
        columnNames.add(ds.getOutputName());
        columnTypes.add(TypeInfoFactory.stringTypeInfo);
    }
    // Aggregator columns
    for (AggregatorFactory af : query.getAggregatorSpecs()) {
        columnNames.add(af.getName());
        columnTypes.add(DruidSerDeUtils.convertDruidToHiveType(af.getTypeName()));
    }
    // different types for post-aggregation functions
    for (PostAggregator pa : query.getPostAggregatorSpecs()) {
        columnNames.add(pa.getName());
        columnTypes.add(TypeInfoFactory.floatTypeInfo);
    }
}
Also used : DimensionSpec(io.druid.query.dimension.DimensionSpec) PostAggregator(io.druid.query.aggregation.PostAggregator) AggregatorFactory(io.druid.query.aggregation.AggregatorFactory)

Aggregations

PostAggregator (io.druid.query.aggregation.PostAggregator)98 Test (org.junit.Test)72 Result (io.druid.query.Result)51 DateTime (org.joda.time.DateTime)47 HyperUniqueFinalizingPostAggregator (io.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator)43 ExtractionDimensionSpec (io.druid.query.dimension.ExtractionDimensionSpec)27 DoubleMaxAggregatorFactory (io.druid.query.aggregation.DoubleMaxAggregatorFactory)26 DoubleMinAggregatorFactory (io.druid.query.aggregation.DoubleMinAggregatorFactory)26 ArithmeticPostAggregator (io.druid.query.aggregation.post.ArithmeticPostAggregator)25 AggregatorFactory (io.druid.query.aggregation.AggregatorFactory)24 FieldAccessPostAggregator (io.druid.query.aggregation.post.FieldAccessPostAggregator)24 ConstantPostAggregator (io.druid.query.aggregation.post.ConstantPostAggregator)23 DefaultDimensionSpec (io.druid.query.dimension.DefaultDimensionSpec)23 HashMap (java.util.HashMap)23 Row (io.druid.data.input.Row)15 RegexDimExtractionFn (io.druid.query.extraction.RegexDimExtractionFn)14 LookupExtractionFn (io.druid.query.lookup.LookupExtractionFn)13 DimensionSpec (io.druid.query.dimension.DimensionSpec)12 CountAggregator (io.druid.query.aggregation.CountAggregator)10 ExpressionPostAggregator (io.druid.query.aggregation.post.ExpressionPostAggregator)10