Search in sources :

Example 31 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class ExpressionSelectors method makeExprEvalSelector.

public static ColumnValueSelector<ExprEval> makeExprEvalSelector(ColumnSelectorFactory columnSelectorFactory, ExpressionPlan plan) {
    if (plan.is(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR)) {
        final String column = plan.getSingleInputName();
        final ColumnType inputType = plan.getSingleInputType();
        if (inputType.is(ValueType.LONG)) {
            return new SingleLongInputCachingExpressionColumnValueSelector(columnSelectorFactory.makeColumnValueSelector(column), plan.getExpression(), // __time doesn't need an LRU cache since it is sorted.
            !ColumnHolder.TIME_COLUMN_NAME.equals(column));
        } else if (inputType.is(ValueType.STRING)) {
            return new SingleStringInputCachingExpressionColumnValueSelector(columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(column, column, ColumnType.STRING)), plan.getExpression());
        }
    }
    final Expr.ObjectBinding bindings = createBindings(columnSelectorFactory, plan);
    // Optimization for constant expressions
    if (bindings.equals(InputBindings.nilBindings())) {
        return new ConstantExprEvalSelector(plan.getExpression().eval(bindings));
    }
    // per row basis
    if (plan.any(ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS)) {
        return new RowBasedExpressionColumnValueSelector(plan, bindings);
    }
    // generic expression value selector for fully known input types
    return new ExpressionColumnValueSelector(plan.getAppliedExpression(), bindings);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) Expr(org.apache.druid.math.expr.Expr) ConstantExprEvalSelector(org.apache.druid.segment.ConstantExprEvalSelector) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec)

Example 32 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class ExpressionPlanner method plan.

/**
 * Druid tries to be chill to expressions to make up for not having a well defined table schema across segments. This
 * method performs some analysis to determine what sort of selectors can be constructed on top of an expression,
 * whether or not the expression will need implicitly mapped across multi-valued inputs, if the expression produces
 * multi-valued outputs, is vectorizable, and everything else interesting when making a selector.
 *
 * Results are stored in a {@link ExpressionPlan}, which can be examined to do whatever is necessary to make things
 * function properly.
 */
public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) {
    final Expr.BindingAnalysis analysis = expression.analyzeInputs();
    Parser.validateExpr(expression, analysis);
    EnumSet<ExpressionPlan.Trait> traits = EnumSet.noneOf(ExpressionPlan.Trait.class);
    Set<String> noCapabilities = new HashSet<>();
    Set<String> maybeMultiValued = new HashSet<>();
    List<String> needsApplied = ImmutableList.of();
    ColumnType singleInputType = null;
    ExpressionType outputType = null;
    final Set<String> columns = analysis.getRequiredBindings();
    // check and set traits which allow optimized selectors to be created
    if (columns.isEmpty()) {
        traits.add(ExpressionPlan.Trait.CONSTANT);
    } else if (expression.isIdentifier()) {
        traits.add(ExpressionPlan.Trait.IDENTIFIER);
    } else if (columns.size() == 1) {
        final String column = Iterables.getOnlyElement(columns);
        final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
        // (i.e. the expression is not treating its input as an array and not wanting to output an array)
        if (capabilities != null && !analysis.hasInputArrays() && !analysis.isOutputArray()) {
            boolean isSingleInputMappable = false;
            boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse();
            if (capabilities.is(ValueType.STRING)) {
                isSingleInputScalar &= capabilities.isDictionaryEncoded().isTrue();
                isSingleInputMappable = capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown();
            }
            // if satisfied, set single input output type and flags
            if (isSingleInputScalar || isSingleInputMappable) {
                singleInputType = capabilities.toColumnType();
                if (isSingleInputScalar) {
                    traits.add(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
                }
                if (isSingleInputMappable) {
                    traits.add(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
                }
            }
        }
    }
    // automatic transformation to map across multi-valued inputs (or row by row detection in the worst case)
    if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.CONSTANT, ExpressionPlan.Trait.IDENTIFIER)) {
        final Set<String> definitelyMultiValued = new HashSet<>();
        final Set<String> definitelyArray = new HashSet<>();
        for (String column : analysis.getRequiredBindings()) {
            final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
            if (capabilities != null) {
                if (capabilities.isArray()) {
                    definitelyArray.add(column);
                } else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isTrue()) {
                    definitelyMultiValued.add(column);
                } else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isMaybeTrue() && !analysis.getArrayBindings().contains(column)) {
                    maybeMultiValued.add(column);
                }
            } else {
                noCapabilities.add(column);
            }
        }
        // find any inputs which will need implicitly mapped across multi-valued rows
        needsApplied = columns.stream().filter(c -> !definitelyArray.contains(c) && definitelyMultiValued.contains(c) && !analysis.getArrayBindings().contains(c)).collect(Collectors.toList());
        // if any multi-value inputs, set flag for non-scalar inputs
        if (analysis.hasInputArrays()) {
            traits.add(ExpressionPlan.Trait.NON_SCALAR_INPUTS);
        }
        if (!noCapabilities.isEmpty()) {
            traits.add(ExpressionPlan.Trait.UNKNOWN_INPUTS);
        }
        if (!maybeMultiValued.isEmpty()) {
            traits.add(ExpressionPlan.Trait.INCOMPLETE_INPUTS);
        }
        // if expression needs transformed, lets do it
        if (!needsApplied.isEmpty()) {
            traits.add(ExpressionPlan.Trait.NEEDS_APPLIED);
        }
    }
    // only set output type if we are pretty confident about input types
    final boolean shouldComputeOutput = ExpressionPlan.none(traits, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS);
    if (shouldComputeOutput) {
        outputType = expression.getOutputType(inspector);
    }
    // if analysis predicts output, or inferred output type, is array, output will be arrays
    if (analysis.isOutputArray() || (outputType != null && outputType.isArray())) {
        traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
        // single input mappable may not produce array output explicitly, only through implicit mapping
        traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
        traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
    }
    // vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping
    // they also do not support unknown inputs, but they also do not currently have to deal with them, as missing
    // capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes,
    // this check should also change
    boolean supportsVector = ExpressionPlan.none(traits, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
    if (supportsVector && expression.canVectorize(inspector)) {
        // make sure to compute the output type for a vector expression though, because we might have skipped it earlier
        // due to unknown inputs, but that's ok here since it just means it doesnt exist
        outputType = expression.getOutputType(inspector);
        traits.add(ExpressionPlan.Trait.VECTORIZABLE);
    }
    return new ExpressionPlan(inspector, expression, analysis, traits, outputType, singleInputType, Sets.union(noCapabilities, maybeMultiValued), needsApplied);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Expr(org.apache.druid.math.expr.Expr) ExpressionType(org.apache.druid.math.expr.ExpressionType) HashSet(java.util.HashSet)

Example 33 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class FinalizingFieldAccessPostAggregator method decorate.

@Override
public FinalizingFieldAccessPostAggregator decorate(final Map<String, AggregatorFactory> aggregators) {
    final Comparator<Object> theComparator;
    final Function<Object, Object> theFinalizer;
    final ColumnType finalizedType;
    if (aggregators != null && aggregators.containsKey(fieldName)) {
        // noinspection unchecked
        theComparator = aggregators.get(fieldName).getComparator();
        theFinalizer = aggregators.get(fieldName)::finalizeComputation;
        finalizedType = aggregators.get(fieldName).getResultType();
    } else {
        // noinspection unchecked
        theComparator = (Comparator) Comparators.naturalNullsFirst();
        theFinalizer = Function.identity();
        finalizedType = null;
    }
    return new FinalizingFieldAccessPostAggregator(name, fieldName, finalizedType, theComparator, theFinalizer);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType)

Example 34 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class Projection method postAggregatorComplexDirectColumnIsOk.

/**
 * Returns true if a post-aggregation "expression" can be realized as a direct field access. This is true if it's
 * a direct column access that doesn't require an implicit cast.
 *
 * @param aggregateRowSignature signature of the aggregation
 * @param expression            post-aggregation expression
 * @param rexNode               RexNode for the post-aggregation expression
 *
 * @return yes or no
 */
private static boolean postAggregatorComplexDirectColumnIsOk(final RowSignature aggregateRowSignature, final DruidExpression expression, final RexNode rexNode) {
    if (!expression.isDirectColumnAccess()) {
        return false;
    }
    // Check if a cast is necessary.
    final ColumnType toValueType = aggregateRowSignature.getColumnType(expression.getDirectColumn()).orElseThrow(() -> new ISE("Encountered null type for column[%s]", expression.getDirectColumn()));
    final ColumnType fromValueType = Calcites.getColumnTypeForRelDataType(rexNode.getType());
    return toValueType.is(ValueType.COMPLEX) && toValueType.equals(fromValueType);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ISE(org.apache.druid.java.util.common.ISE)

Example 35 with ColumnType

use of org.apache.druid.segment.column.ColumnType in project druid by druid-io.

the class SystemSchemaTest method verifyTypes.

private static void verifyTypes(final List<Object[]> rows, final RowSignature signature) {
    final RelDataType rowType = RowSignatures.toRelDataType(signature, new JavaTypeFactoryImpl());
    for (Object[] row : rows) {
        Assert.assertEquals(row.length, signature.size());
        for (int i = 0; i < row.length; i++) {
            final Class<?> expectedClass;
            final ColumnType columnType = signature.getColumnType(i).orElseThrow(() -> new ISE("Encountered null column type"));
            final boolean nullable = rowType.getFieldList().get(i).getType().isNullable();
            switch(columnType.getType()) {
                case LONG:
                    expectedClass = Long.class;
                    break;
                case FLOAT:
                    expectedClass = Float.class;
                    break;
                case DOUBLE:
                    expectedClass = Double.class;
                    break;
                case STRING:
                    if (signature.getColumnName(i).equals("segment_id")) {
                        expectedClass = SegmentId.class;
                    } else {
                        expectedClass = String.class;
                    }
                    break;
                default:
                    throw new IAE("Don't know what class to expect for valueType[%s]", columnType);
            }
            if (nullable) {
                Assert.assertTrue(StringUtils.format("Column[%s] is a [%s] or null (was %s)", signature.getColumnName(i), expectedClass.getName(), row[i] == null ? null : row[i].getClass().getName()), row[i] == null || expectedClass.isAssignableFrom(row[i].getClass()));
            } else {
                Assert.assertTrue(StringUtils.format("Column[%s] is a [%s] (was %s)", signature.getColumnName(i), expectedClass.getName(), row[i] == null ? null : row[i].getClass().getName()), row[i] != null && expectedClass.isAssignableFrom(row[i].getClass()));
            }
        }
    }
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) JavaTypeFactoryImpl(org.apache.calcite.jdbc.JavaTypeFactoryImpl) RelDataType(org.apache.calcite.rel.type.RelDataType) ISE(org.apache.druid.java.util.common.ISE) IAE(org.apache.druid.java.util.common.IAE)

Aggregations

ColumnType (org.apache.druid.segment.column.ColumnType)43 Nullable (javax.annotation.Nullable)16 ISE (org.apache.druid.java.util.common.ISE)15 RowSignature (org.apache.druid.segment.column.RowSignature)14 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)13 RexNode (org.apache.calcite.rex.RexNode)12 DruidExpression (org.apache.druid.sql.calcite.expression.DruidExpression)12 List (java.util.List)11 IAE (org.apache.druid.java.util.common.IAE)11 RelDataType (org.apache.calcite.rel.type.RelDataType)9 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)9 Collectors (java.util.stream.Collectors)8 ArrayList (java.util.ArrayList)7 SqlAggFunction (org.apache.calcite.sql.SqlAggFunction)5 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)5 StringComparator (org.apache.druid.query.ordering.StringComparator)5 Aggregation (org.apache.druid.sql.calcite.aggregation.Aggregation)5 JsonCreator (com.fasterxml.jackson.annotation.JsonCreator)4 Preconditions (com.google.common.base.Preconditions)4 Collections (java.util.Collections)4