use of org.apache.druid.math.expr.ExpressionType in project druid by apache.
the class ExpressionPlanner method plan.
/**
* Druid tries to be chill to expressions to make up for not having a well defined table schema across segments. This
* method performs some analysis to determine what sort of selectors can be constructed on top of an expression,
* whether or not the expression will need implicitly mapped across multi-valued inputs, if the expression produces
* multi-valued outputs, is vectorizable, and everything else interesting when making a selector.
*
* Results are stored in a {@link ExpressionPlan}, which can be examined to do whatever is necessary to make things
* function properly.
*/
public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) {
final Expr.BindingAnalysis analysis = expression.analyzeInputs();
Parser.validateExpr(expression, analysis);
EnumSet<ExpressionPlan.Trait> traits = EnumSet.noneOf(ExpressionPlan.Trait.class);
Set<String> noCapabilities = new HashSet<>();
Set<String> maybeMultiValued = new HashSet<>();
List<String> needsApplied = ImmutableList.of();
ColumnType singleInputType = null;
ExpressionType outputType = null;
final Set<String> columns = analysis.getRequiredBindings();
// check and set traits which allow optimized selectors to be created
if (columns.isEmpty()) {
traits.add(ExpressionPlan.Trait.CONSTANT);
} else if (expression.isIdentifier()) {
traits.add(ExpressionPlan.Trait.IDENTIFIER);
} else if (columns.size() == 1) {
final String column = Iterables.getOnlyElement(columns);
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
// (i.e. the expression is not treating its input as an array and not wanting to output an array)
if (capabilities != null && !analysis.hasInputArrays() && !analysis.isOutputArray()) {
boolean isSingleInputMappable = false;
boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse();
if (capabilities.is(ValueType.STRING)) {
isSingleInputScalar &= capabilities.isDictionaryEncoded().isTrue();
isSingleInputMappable = capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown();
}
// if satisfied, set single input output type and flags
if (isSingleInputScalar || isSingleInputMappable) {
singleInputType = capabilities.toColumnType();
if (isSingleInputScalar) {
traits.add(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
}
if (isSingleInputMappable) {
traits.add(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
}
}
}
}
// automatic transformation to map across multi-valued inputs (or row by row detection in the worst case)
if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.CONSTANT, ExpressionPlan.Trait.IDENTIFIER)) {
final Set<String> definitelyMultiValued = new HashSet<>();
final Set<String> definitelyArray = new HashSet<>();
for (String column : analysis.getRequiredBindings()) {
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
if (capabilities != null) {
if (capabilities.isArray()) {
definitelyArray.add(column);
} else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isTrue()) {
definitelyMultiValued.add(column);
} else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isMaybeTrue() && !analysis.getArrayBindings().contains(column)) {
maybeMultiValued.add(column);
}
} else {
noCapabilities.add(column);
}
}
// find any inputs which will need implicitly mapped across multi-valued rows
needsApplied = columns.stream().filter(c -> !definitelyArray.contains(c) && definitelyMultiValued.contains(c) && !analysis.getArrayBindings().contains(c)).collect(Collectors.toList());
// if any multi-value inputs, set flag for non-scalar inputs
if (analysis.hasInputArrays()) {
traits.add(ExpressionPlan.Trait.NON_SCALAR_INPUTS);
}
if (!noCapabilities.isEmpty()) {
traits.add(ExpressionPlan.Trait.UNKNOWN_INPUTS);
}
if (!maybeMultiValued.isEmpty()) {
traits.add(ExpressionPlan.Trait.INCOMPLETE_INPUTS);
}
// if expression needs transformed, lets do it
if (!needsApplied.isEmpty()) {
traits.add(ExpressionPlan.Trait.NEEDS_APPLIED);
}
}
// only set output type if we are pretty confident about input types
final boolean shouldComputeOutput = ExpressionPlan.none(traits, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS);
if (shouldComputeOutput) {
outputType = expression.getOutputType(inspector);
}
// if analysis predicts output, or inferred output type, is array, output will be arrays
if (analysis.isOutputArray() || (outputType != null && outputType.isArray())) {
traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
// single input mappable may not produce array output explicitly, only through implicit mapping
traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
}
// vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping
// they also do not support unknown inputs, but they also do not currently have to deal with them, as missing
// capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes,
// this check should also change
boolean supportsVector = ExpressionPlan.none(traits, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
if (supportsVector && expression.canVectorize(inspector)) {
// make sure to compute the output type for a vector expression though, because we might have skipped it earlier
// due to unknown inputs, but that's ok here since it just means it doesnt exist
outputType = expression.getOutputType(inspector);
traits.add(ExpressionPlan.Trait.VECTORIZABLE);
}
return new ExpressionPlan(inspector, expression, analysis, traits, outputType, singleInputType, Sets.union(noCapabilities, maybeMultiValued), needsApplied);
}
use of org.apache.druid.math.expr.ExpressionType in project druid by apache.
the class Projection method postAggregatorDirectColumnIsOk.
/**
* Returns true if a post-aggregation "expression" can be realized as a direct field access. This is true if it's
* a direct column access that doesn't require an implicit cast.
*
* @param aggregateRowSignature signature of the aggregation
* @param expression post-aggregation expression
* @param rexNode RexNode for the post-aggregation expression
*
* @return yes or no
*/
private static boolean postAggregatorDirectColumnIsOk(final RowSignature aggregateRowSignature, final DruidExpression expression, final RexNode rexNode) {
if (!expression.isDirectColumnAccess()) {
return false;
}
// We don't really have a way to cast complex type. So might as well not do anything and return.
final ColumnType columnValueType = aggregateRowSignature.getColumnType(expression.getDirectColumn()).orElseThrow(() -> new ISE("Encountered null type for column[%s]", expression.getDirectColumn()));
if (columnValueType.is(ValueType.COMPLEX)) {
return true;
}
// Check if a cast is necessary.
final ExpressionType toExprType = ExpressionType.fromColumnTypeStrict(columnValueType);
final ExpressionType fromExprType = ExpressionType.fromColumnTypeStrict(Calcites.getColumnTypeForRelDataType(rexNode.getType()));
return toExprType.equals(fromExprType);
}
use of org.apache.druid.math.expr.ExpressionType in project druid by apache.
the class VectorComparisonProcessors method makeComparisonProcessor.
@Deprecated
public static <T> ExprVectorProcessor<T> makeComparisonProcessor(Expr.VectorInputBindingInspector inspector, Expr left, Expr right, Supplier<LongOutStringsInFunctionVectorProcessor> longOutStringsInFunctionVectorProcessor, Supplier<LongOutLongsInFunctionVectorValueProcessor> longOutLongsInProcessor, Supplier<DoubleOutLongDoubleInFunctionVectorValueProcessor> doubleOutLongDoubleInProcessor, Supplier<DoubleOutDoubleLongInFunctionVectorValueProcessor> doubleOutDoubleLongInProcessor, Supplier<DoubleOutDoublesInFunctionVectorValueProcessor> doubleOutDoublesInProcessor) {
assert !ExpressionProcessing.useStrictBooleans();
final ExpressionType leftType = left.getOutputType(inspector);
final ExpressionType rightType = right.getOutputType(inspector);
ExprVectorProcessor<?> processor = null;
if (Types.is(leftType, ExprType.STRING)) {
if (Types.isNullOr(rightType, ExprType.STRING)) {
processor = longOutStringsInFunctionVectorProcessor.get();
} else {
processor = doubleOutDoublesInProcessor.get();
}
} else if (leftType == null) {
if (Types.isNullOr(rightType, ExprType.STRING)) {
processor = longOutStringsInFunctionVectorProcessor.get();
}
} else if (leftType.is(ExprType.DOUBLE) || Types.is(rightType, ExprType.DOUBLE)) {
processor = doubleOutDoublesInProcessor.get();
}
if (processor != null) {
return (ExprVectorProcessor<T>) processor;
}
// fall through to normal math processor logic
return VectorMathProcessors.makeMathProcessor(inspector, left, right, longOutLongsInProcessor, doubleOutLongDoubleInProcessor, doubleOutDoubleLongInProcessor, doubleOutDoublesInProcessor);
}
use of org.apache.druid.math.expr.ExpressionType in project druid by apache.
the class VectorComparisonProcessors method makeBooleanProcessor.
public static <T> ExprVectorProcessor<T> makeBooleanProcessor(Expr.VectorInputBindingInspector inspector, Expr left, Expr right, Supplier<LongOutStringsInFunctionVectorProcessor> longOutStringsInFunctionVectorProcessor, Supplier<LongOutLongsInFunctionVectorValueProcessor> longOutLongsInProcessor, Supplier<LongOutLongDoubleInFunctionVectorValueProcessor> longOutLongDoubleInProcessor, Supplier<LongOutDoubleLongInFunctionVectorValueProcessor> longOutDoubleLongInProcessor, Supplier<LongOutDoublesInFunctionVectorValueProcessor> longOutDoublesInProcessor) {
final ExpressionType leftType = left.getOutputType(inspector);
final ExpressionType rightType = right.getOutputType(inspector);
ExprVectorProcessor<?> processor = null;
if (Types.is(leftType, ExprType.STRING)) {
if (Types.isNullOr(rightType, ExprType.STRING)) {
processor = longOutStringsInFunctionVectorProcessor.get();
} else {
processor = longOutDoublesInProcessor.get();
}
} else if (Types.is(rightType, ExprType.STRING)) {
if (leftType == null) {
processor = longOutStringsInFunctionVectorProcessor.get();
} else {
processor = longOutDoublesInProcessor.get();
}
} else if (leftType == null) {
if (Types.isNullOr(rightType, ExprType.STRING)) {
processor = longOutStringsInFunctionVectorProcessor.get();
}
} else if (leftType.is(ExprType.DOUBLE) || Types.is(rightType, ExprType.DOUBLE)) {
processor = longOutDoublesInProcessor.get();
}
if (processor != null) {
return (ExprVectorProcessor<T>) processor;
}
// fall through to normal math processor logic
return VectorMathProcessors.makeLongMathProcessor(inspector, left, right, longOutLongsInProcessor, longOutLongDoubleInProcessor, longOutDoubleLongInProcessor, longOutDoublesInProcessor);
}
use of org.apache.druid.math.expr.ExpressionType in project druid by apache.
the class VectorProcessors method isNull.
public static <T> ExprVectorProcessor<T> isNull(Expr.VectorInputBindingInspector inspector, Expr expr) {
final ExpressionType type = expr.getOutputType(inspector);
if (type == null) {
return constant(1L, inspector.getMaxVectorSize());
}
final long[] outputValues = new long[inspector.getMaxVectorSize()];
ExprVectorProcessor<?> processor = null;
if (Types.is(type, ExprType.STRING)) {
final ExprVectorProcessor<String[]> input = expr.buildVectorized(inspector);
processor = new ExprVectorProcessor<long[]>() {
@Override
public ExprEvalVector<long[]> evalVector(Expr.VectorInputBinding bindings) {
final ExprEvalVector<String[]> inputEval = input.evalVector(bindings);
final int currentSize = bindings.getCurrentVectorSize();
final String[] values = inputEval.values();
for (int i = 0; i < currentSize; i++) {
if (values[i] == null) {
outputValues[i] = 1L;
} else {
outputValues[i] = 0L;
}
}
return new ExprEvalLongVector(outputValues, null);
}
@Override
public ExpressionType getOutputType() {
return ExpressionType.LONG;
}
};
} else if (Types.is(type, ExprType.LONG)) {
final ExprVectorProcessor<long[]> input = expr.buildVectorized(inspector);
processor = new ExprVectorProcessor<long[]>() {
@Override
public ExprEvalVector<long[]> evalVector(Expr.VectorInputBinding bindings) {
final ExprEvalVector<long[]> inputEval = input.evalVector(bindings);
final int currentSize = bindings.getCurrentVectorSize();
final boolean[] nulls = inputEval.getNullVector();
if (nulls == null) {
Arrays.fill(outputValues, 0L);
} else {
for (int i = 0; i < currentSize; i++) {
if (nulls[i]) {
outputValues[i] = 1L;
} else {
outputValues[i] = 0L;
}
}
}
return new ExprEvalLongVector(outputValues, null);
}
@Override
public ExpressionType getOutputType() {
return ExpressionType.LONG;
}
};
} else if (Types.is(type, ExprType.DOUBLE)) {
final ExprVectorProcessor<double[]> input = expr.buildVectorized(inspector);
processor = new ExprVectorProcessor<long[]>() {
@Override
public ExprEvalVector<long[]> evalVector(Expr.VectorInputBinding bindings) {
final ExprEvalVector<double[]> inputEval = input.evalVector(bindings);
final int currentSize = bindings.getCurrentVectorSize();
final boolean[] nulls = inputEval.getNullVector();
if (nulls == null) {
Arrays.fill(outputValues, 0L);
} else {
for (int i = 0; i < currentSize; i++) {
if (nulls[i]) {
outputValues[i] = 1L;
} else {
outputValues[i] = 0L;
}
}
}
return new ExprEvalLongVector(outputValues, null);
}
@Override
public ExpressionType getOutputType() {
return ExpressionType.LONG;
}
};
}
if (processor == null) {
throw Exprs.cannotVectorize();
}
return (ExprVectorProcessor<T>) processor;
}
Aggregations