Search in sources :

Example 51 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class IncrementalIndexColumnSelectorFactory method makeDimensionSelectorUndecorated.

private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
    final String dimension = dimensionSpec.getDimension();
    final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
    if (dimension.equals(ColumnHolder.TIME_COLUMN_NAME)) {
        return new SingleScanTimeDimensionSelector(makeColumnValueSelector(dimension), extractionFn, descending);
    }
    final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension());
    if (dimensionDesc == null) {
        // not a dimension, column may be a metric
        ColumnCapabilities capabilities = getColumnCapabilities(dimension);
        if (capabilities == null) {
            return DimensionSelector.constant(null, extractionFn);
        }
        if (capabilities.isNumeric()) {
            return ValueTypes.makeNumericWrappingDimensionSelector(capabilities.getType(), makeColumnValueSelector(dimension), extractionFn);
        }
        // if we can't wrap the base column, just return a column of all nulls
        return DimensionSelector.constant(null, extractionFn);
    } else {
        final DimensionIndexer indexer = dimensionDesc.getIndexer();
        return indexer.makeDimensionSelector(dimensionSpec, rowHolder, dimensionDesc);
    }
}
Also used : ExtractionFn(org.apache.druid.query.extraction.ExtractionFn) DimensionIndexer(org.apache.druid.segment.DimensionIndexer) SingleScanTimeDimensionSelector(org.apache.druid.segment.SingleScanTimeDimensionSelector) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities)

Example 52 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ExpressionSelectors method createBindings.

/**
 * Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link ExpressionPlan} which
 * provides the set of identifiers which need a binding (list of required columns), and context of whether or not they
 * are used as array or scalar inputs
 */
public static Expr.ObjectBinding createBindings(ColumnSelectorFactory columnSelectorFactory, ExpressionPlan plan) {
    final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
    final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>();
    for (String columnName : columns) {
        final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
        final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
        final Supplier<Object> supplier;
        final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities);
        final boolean useObjectSupplierForMultiValueStringArray = capabilities != null && // multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in
        ((!capabilities.is(ValueType.STRING)) || (capabilities.is(ValueType.STRING) && !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays() && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED))) && // expression has array output
        plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
        final boolean homogenizeNullMultiValueStringArrays = plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();
        if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) {
            // Unknown type, array type, or output array uses an Object selector and see if that gives anything useful
            supplier = supplierFromObjectSelector(columnSelectorFactory.makeColumnValueSelector(columnName), homogenizeNullMultiValueStringArrays);
        } else if (capabilities.is(ValueType.FLOAT)) {
            ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
            supplier = makeNullableNumericSupplier(selector, selector::getFloat);
        } else if (capabilities.is(ValueType.LONG)) {
            ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
            supplier = makeNullableNumericSupplier(selector, selector::getLong);
        } else if (capabilities.is(ValueType.DOUBLE)) {
            ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
            supplier = makeNullableNumericSupplier(selector, selector::getDouble);
        } else if (capabilities.is(ValueType.STRING)) {
            supplier = supplierFromDimensionSelector(columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), multiVal, homogenizeNullMultiValueStringArrays);
        } else {
            // complex type just pass straight through
            ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
            if (!(selector instanceof NilColumnValueSelector)) {
                supplier = selector::getObject;
            } else {
                supplier = null;
            }
        }
        if (supplier != null) {
            suppliers.put(columnName, new Pair<>(expressionType, supplier));
        }
    }
    if (suppliers.isEmpty()) {
        return InputBindings.nilBindings();
    } else if (suppliers.size() == 1 && columns.size() == 1) {
        // If there's only one column (and it has a supplier), we can skip the Map and just use that supplier when
        // asked for something.
        final String column = Iterables.getOnlyElement(suppliers.keySet());
        final Pair<ExpressionType, Supplier<Object>> supplier = Iterables.getOnlyElement(suppliers.values());
        return new Expr.ObjectBinding() {

            @Nullable
            @Override
            public Object get(String name) {
                // There's only one binding, and it must be the single column, so it can safely be ignored in production.
                assert column.equals(name);
                return supplier.rhs.get();
            }

            @Nullable
            @Override
            public ExpressionType getType(String name) {
                return supplier.lhs;
            }
        };
    } else {
        return InputBindings.withTypedSuppliers(suppliers);
    }
}
Also used : HashMap(java.util.HashMap) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) NilColumnValueSelector(org.apache.druid.segment.NilColumnValueSelector) Expr(org.apache.druid.math.expr.Expr) ExpressionType(org.apache.druid.math.expr.ExpressionType) Nullable(javax.annotation.Nullable) Pair(org.apache.druid.java.util.common.Pair) NonnullPair(org.apache.druid.java.util.common.NonnullPair) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) NilColumnValueSelector(org.apache.druid.segment.NilColumnValueSelector) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector)

Example 53 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class ExpressionPlanner method plan.

/**
 * Druid tries to be chill to expressions to make up for not having a well defined table schema across segments. This
 * method performs some analysis to determine what sort of selectors can be constructed on top of an expression,
 * whether or not the expression will need implicitly mapped across multi-valued inputs, if the expression produces
 * multi-valued outputs, is vectorizable, and everything else interesting when making a selector.
 *
 * Results are stored in a {@link ExpressionPlan}, which can be examined to do whatever is necessary to make things
 * function properly.
 */
public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) {
    final Expr.BindingAnalysis analysis = expression.analyzeInputs();
    Parser.validateExpr(expression, analysis);
    EnumSet<ExpressionPlan.Trait> traits = EnumSet.noneOf(ExpressionPlan.Trait.class);
    Set<String> noCapabilities = new HashSet<>();
    Set<String> maybeMultiValued = new HashSet<>();
    List<String> needsApplied = ImmutableList.of();
    ColumnType singleInputType = null;
    ExpressionType outputType = null;
    final Set<String> columns = analysis.getRequiredBindings();
    // check and set traits which allow optimized selectors to be created
    if (columns.isEmpty()) {
        traits.add(ExpressionPlan.Trait.CONSTANT);
    } else if (expression.isIdentifier()) {
        traits.add(ExpressionPlan.Trait.IDENTIFIER);
    } else if (columns.size() == 1) {
        final String column = Iterables.getOnlyElement(columns);
        final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
        // (i.e. the expression is not treating its input as an array and not wanting to output an array)
        if (capabilities != null && !analysis.hasInputArrays() && !analysis.isOutputArray()) {
            boolean isSingleInputMappable = false;
            boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse();
            if (capabilities.is(ValueType.STRING)) {
                isSingleInputScalar &= capabilities.isDictionaryEncoded().isTrue();
                isSingleInputMappable = capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown();
            }
            // if satisfied, set single input output type and flags
            if (isSingleInputScalar || isSingleInputMappable) {
                singleInputType = capabilities.toColumnType();
                if (isSingleInputScalar) {
                    traits.add(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
                }
                if (isSingleInputMappable) {
                    traits.add(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
                }
            }
        }
    }
    // automatic transformation to map across multi-valued inputs (or row by row detection in the worst case)
    if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.CONSTANT, ExpressionPlan.Trait.IDENTIFIER)) {
        final Set<String> definitelyMultiValued = new HashSet<>();
        final Set<String> definitelyArray = new HashSet<>();
        for (String column : analysis.getRequiredBindings()) {
            final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
            if (capabilities != null) {
                if (capabilities.isArray()) {
                    definitelyArray.add(column);
                } else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isTrue()) {
                    definitelyMultiValued.add(column);
                } else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isMaybeTrue() && !analysis.getArrayBindings().contains(column)) {
                    maybeMultiValued.add(column);
                }
            } else {
                noCapabilities.add(column);
            }
        }
        // find any inputs which will need implicitly mapped across multi-valued rows
        needsApplied = columns.stream().filter(c -> !definitelyArray.contains(c) && definitelyMultiValued.contains(c) && !analysis.getArrayBindings().contains(c)).collect(Collectors.toList());
        // if any multi-value inputs, set flag for non-scalar inputs
        if (analysis.hasInputArrays()) {
            traits.add(ExpressionPlan.Trait.NON_SCALAR_INPUTS);
        }
        if (!noCapabilities.isEmpty()) {
            traits.add(ExpressionPlan.Trait.UNKNOWN_INPUTS);
        }
        if (!maybeMultiValued.isEmpty()) {
            traits.add(ExpressionPlan.Trait.INCOMPLETE_INPUTS);
        }
        // if expression needs transformed, lets do it
        if (!needsApplied.isEmpty()) {
            traits.add(ExpressionPlan.Trait.NEEDS_APPLIED);
        }
    }
    // only set output type if we are pretty confident about input types
    final boolean shouldComputeOutput = ExpressionPlan.none(traits, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS);
    if (shouldComputeOutput) {
        outputType = expression.getOutputType(inspector);
    }
    // if analysis predicts output, or inferred output type, is array, output will be arrays
    if (analysis.isOutputArray() || (outputType != null && outputType.isArray())) {
        traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
        // single input mappable may not produce array output explicitly, only through implicit mapping
        traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
        traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
    }
    // vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping
    // they also do not support unknown inputs, but they also do not currently have to deal with them, as missing
    // capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes,
    // this check should also change
    boolean supportsVector = ExpressionPlan.none(traits, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
    if (supportsVector && expression.canVectorize(inspector)) {
        // make sure to compute the output type for a vector expression though, because we might have skipped it earlier
        // due to unknown inputs, but that's ok here since it just means it doesnt exist
        outputType = expression.getOutputType(inspector);
        traits.add(ExpressionPlan.Trait.VECTORIZABLE);
    }
    return new ExpressionPlan(inspector, expression, analysis, traits, outputType, singleInputType, Sets.union(noCapabilities, maybeMultiValued), needsApplied);
}
Also used : ColumnType(org.apache.druid.segment.column.ColumnType) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Expr(org.apache.druid.math.expr.Expr) ExpressionType(org.apache.druid.math.expr.ExpressionType) HashSet(java.util.HashSet)

Example 54 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class HashJoinSegmentStorageAdapterTest method test_getColumnCapabilities_factToCountryNonexistentFactColumn.

@Test
public void test_getColumnCapabilities_factToCountryNonexistentFactColumn() {
    final ColumnCapabilities capabilities = makeFactToCountrySegment().getColumnCapabilities("nonexistent");
    Assert.assertNull(capabilities);
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Test(org.junit.Test)

Example 55 with ColumnCapabilities

use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.

the class LookupJoinableTest method getColumnCapabilitiesForUnknownColumnShouldReturnNull.

@Test
public void getColumnCapabilitiesForUnknownColumnShouldReturnNull() {
    ColumnCapabilities capabilities = target.getColumnCapabilities(UNKNOWN_COLUMN);
    Assert.assertNull(capabilities);
}
Also used : ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) Test(org.junit.Test) NullHandlingTest(org.apache.druid.common.config.NullHandlingTest)

Aggregations

ColumnCapabilities (org.apache.druid.segment.column.ColumnCapabilities)156 Test (org.junit.Test)104 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)58 ColumnCapabilitiesImpl (org.apache.druid.segment.column.ColumnCapabilitiesImpl)18 ArrayList (java.util.ArrayList)8 Nullable (javax.annotation.Nullable)8 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)8 DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)8 ColumnValueSelector (org.apache.druid.segment.ColumnValueSelector)8 ColumnHolder (org.apache.druid.segment.column.ColumnHolder)8 ColumnType (org.apache.druid.segment.column.ColumnType)8 RowSignature (org.apache.druid.segment.column.RowSignature)8 ValueType (org.apache.druid.segment.column.ValueType)7 List (java.util.List)6 NullHandlingTest (org.apache.druid.common.config.NullHandlingTest)6 Pair (org.apache.druid.java.util.common.Pair)6 Expr (org.apache.druid.math.expr.Expr)6 ExpressionType (org.apache.druid.math.expr.ExpressionType)6 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)6 DimensionIndexer (org.apache.druid.segment.DimensionIndexer)6