use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class IncrementalIndexColumnSelectorFactory method makeDimensionSelectorUndecorated.
private DimensionSelector makeDimensionSelectorUndecorated(DimensionSpec dimensionSpec) {
final String dimension = dimensionSpec.getDimension();
final ExtractionFn extractionFn = dimensionSpec.getExtractionFn();
if (dimension.equals(ColumnHolder.TIME_COLUMN_NAME)) {
return new SingleScanTimeDimensionSelector(makeColumnValueSelector(dimension), extractionFn, descending);
}
final IncrementalIndex.DimensionDesc dimensionDesc = index.getDimension(dimensionSpec.getDimension());
if (dimensionDesc == null) {
// not a dimension, column may be a metric
ColumnCapabilities capabilities = getColumnCapabilities(dimension);
if (capabilities == null) {
return DimensionSelector.constant(null, extractionFn);
}
if (capabilities.isNumeric()) {
return ValueTypes.makeNumericWrappingDimensionSelector(capabilities.getType(), makeColumnValueSelector(dimension), extractionFn);
}
// if we can't wrap the base column, just return a column of all nulls
return DimensionSelector.constant(null, extractionFn);
} else {
final DimensionIndexer indexer = dimensionDesc.getIndexer();
return indexer.makeDimensionSelector(dimensionSpec, rowHolder, dimensionDesc);
}
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ExpressionSelectors method createBindings.
/**
* Create {@link Expr.ObjectBinding} given a {@link ColumnSelectorFactory} and {@link ExpressionPlan} which
* provides the set of identifiers which need a binding (list of required columns), and context of whether or not they
* are used as array or scalar inputs
*/
public static Expr.ObjectBinding createBindings(ColumnSelectorFactory columnSelectorFactory, ExpressionPlan plan) {
final List<String> columns = plan.getAnalysis().getRequiredBindingsList();
final Map<String, Pair<ExpressionType, Supplier<Object>>> suppliers = new HashMap<>();
for (String columnName : columns) {
final ColumnCapabilities capabilities = columnSelectorFactory.getColumnCapabilities(columnName);
final boolean multiVal = capabilities != null && capabilities.hasMultipleValues().isTrue();
final Supplier<Object> supplier;
final ExpressionType expressionType = ExpressionType.fromColumnType(capabilities);
final boolean useObjectSupplierForMultiValueStringArray = capabilities != null && // multi-value rows, we can just use the dimension selector, which has the homogenization behavior built-in
((!capabilities.is(ValueType.STRING)) || (capabilities.is(ValueType.STRING) && !ExpressionProcessing.isHomogenizeNullMultiValueStringArrays() && !plan.is(ExpressionPlan.Trait.NEEDS_APPLIED))) && // expression has array output
plan.is(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
final boolean homogenizeNullMultiValueStringArrays = plan.is(ExpressionPlan.Trait.NEEDS_APPLIED) || ExpressionProcessing.isHomogenizeNullMultiValueStringArrays();
if (capabilities == null || capabilities.isArray() || useObjectSupplierForMultiValueStringArray) {
// Unknown type, array type, or output array uses an Object selector and see if that gives anything useful
supplier = supplierFromObjectSelector(columnSelectorFactory.makeColumnValueSelector(columnName), homogenizeNullMultiValueStringArrays);
} else if (capabilities.is(ValueType.FLOAT)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getFloat);
} else if (capabilities.is(ValueType.LONG)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getLong);
} else if (capabilities.is(ValueType.DOUBLE)) {
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
supplier = makeNullableNumericSupplier(selector, selector::getDouble);
} else if (capabilities.is(ValueType.STRING)) {
supplier = supplierFromDimensionSelector(columnSelectorFactory.makeDimensionSelector(new DefaultDimensionSpec(columnName, columnName)), multiVal, homogenizeNullMultiValueStringArrays);
} else {
// complex type just pass straight through
ColumnValueSelector<?> selector = columnSelectorFactory.makeColumnValueSelector(columnName);
if (!(selector instanceof NilColumnValueSelector)) {
supplier = selector::getObject;
} else {
supplier = null;
}
}
if (supplier != null) {
suppliers.put(columnName, new Pair<>(expressionType, supplier));
}
}
if (suppliers.isEmpty()) {
return InputBindings.nilBindings();
} else if (suppliers.size() == 1 && columns.size() == 1) {
// If there's only one column (and it has a supplier), we can skip the Map and just use that supplier when
// asked for something.
final String column = Iterables.getOnlyElement(suppliers.keySet());
final Pair<ExpressionType, Supplier<Object>> supplier = Iterables.getOnlyElement(suppliers.values());
return new Expr.ObjectBinding() {
@Nullable
@Override
public Object get(String name) {
// There's only one binding, and it must be the single column, so it can safely be ignored in production.
assert column.equals(name);
return supplier.rhs.get();
}
@Nullable
@Override
public ExpressionType getType(String name) {
return supplier.lhs;
}
};
} else {
return InputBindings.withTypedSuppliers(suppliers);
}
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class ExpressionPlanner method plan.
/**
* Druid tries to be chill to expressions to make up for not having a well defined table schema across segments. This
* method performs some analysis to determine what sort of selectors can be constructed on top of an expression,
* whether or not the expression will need implicitly mapped across multi-valued inputs, if the expression produces
* multi-valued outputs, is vectorizable, and everything else interesting when making a selector.
*
* Results are stored in a {@link ExpressionPlan}, which can be examined to do whatever is necessary to make things
* function properly.
*/
public static ExpressionPlan plan(ColumnInspector inspector, Expr expression) {
final Expr.BindingAnalysis analysis = expression.analyzeInputs();
Parser.validateExpr(expression, analysis);
EnumSet<ExpressionPlan.Trait> traits = EnumSet.noneOf(ExpressionPlan.Trait.class);
Set<String> noCapabilities = new HashSet<>();
Set<String> maybeMultiValued = new HashSet<>();
List<String> needsApplied = ImmutableList.of();
ColumnType singleInputType = null;
ExpressionType outputType = null;
final Set<String> columns = analysis.getRequiredBindings();
// check and set traits which allow optimized selectors to be created
if (columns.isEmpty()) {
traits.add(ExpressionPlan.Trait.CONSTANT);
} else if (expression.isIdentifier()) {
traits.add(ExpressionPlan.Trait.IDENTIFIER);
} else if (columns.size() == 1) {
final String column = Iterables.getOnlyElement(columns);
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
// (i.e. the expression is not treating its input as an array and not wanting to output an array)
if (capabilities != null && !analysis.hasInputArrays() && !analysis.isOutputArray()) {
boolean isSingleInputMappable = false;
boolean isSingleInputScalar = capabilities.hasMultipleValues().isFalse();
if (capabilities.is(ValueType.STRING)) {
isSingleInputScalar &= capabilities.isDictionaryEncoded().isTrue();
isSingleInputMappable = capabilities.isDictionaryEncoded().isTrue() && !capabilities.hasMultipleValues().isUnknown();
}
// if satisfied, set single input output type and flags
if (isSingleInputScalar || isSingleInputMappable) {
singleInputType = capabilities.toColumnType();
if (isSingleInputScalar) {
traits.add(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
}
if (isSingleInputMappable) {
traits.add(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
}
}
}
}
// automatic transformation to map across multi-valued inputs (or row by row detection in the worst case)
if (ExpressionPlan.none(traits, ExpressionPlan.Trait.SINGLE_INPUT_SCALAR, ExpressionPlan.Trait.CONSTANT, ExpressionPlan.Trait.IDENTIFIER)) {
final Set<String> definitelyMultiValued = new HashSet<>();
final Set<String> definitelyArray = new HashSet<>();
for (String column : analysis.getRequiredBindings()) {
final ColumnCapabilities capabilities = inspector.getColumnCapabilities(column);
if (capabilities != null) {
if (capabilities.isArray()) {
definitelyArray.add(column);
} else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isTrue()) {
definitelyMultiValued.add(column);
} else if (capabilities.is(ValueType.STRING) && capabilities.hasMultipleValues().isMaybeTrue() && !analysis.getArrayBindings().contains(column)) {
maybeMultiValued.add(column);
}
} else {
noCapabilities.add(column);
}
}
// find any inputs which will need implicitly mapped across multi-valued rows
needsApplied = columns.stream().filter(c -> !definitelyArray.contains(c) && definitelyMultiValued.contains(c) && !analysis.getArrayBindings().contains(c)).collect(Collectors.toList());
// if any multi-value inputs, set flag for non-scalar inputs
if (analysis.hasInputArrays()) {
traits.add(ExpressionPlan.Trait.NON_SCALAR_INPUTS);
}
if (!noCapabilities.isEmpty()) {
traits.add(ExpressionPlan.Trait.UNKNOWN_INPUTS);
}
if (!maybeMultiValued.isEmpty()) {
traits.add(ExpressionPlan.Trait.INCOMPLETE_INPUTS);
}
// if expression needs transformed, lets do it
if (!needsApplied.isEmpty()) {
traits.add(ExpressionPlan.Trait.NEEDS_APPLIED);
}
}
// only set output type if we are pretty confident about input types
final boolean shouldComputeOutput = ExpressionPlan.none(traits, ExpressionPlan.Trait.UNKNOWN_INPUTS, ExpressionPlan.Trait.INCOMPLETE_INPUTS);
if (shouldComputeOutput) {
outputType = expression.getOutputType(inspector);
}
// if analysis predicts output, or inferred output type, is array, output will be arrays
if (analysis.isOutputArray() || (outputType != null && outputType.isArray())) {
traits.add(ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
// single input mappable may not produce array output explicitly, only through implicit mapping
traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_SCALAR);
traits.remove(ExpressionPlan.Trait.SINGLE_INPUT_MAPPABLE);
}
// vectorized expressions do not support incomplete, multi-valued inputs or outputs, or implicit mapping
// they also do not support unknown inputs, but they also do not currently have to deal with them, as missing
// capabilites is indicative of a non-existent column instead of an unknown schema. If this ever changes,
// this check should also change
boolean supportsVector = ExpressionPlan.none(traits, ExpressionPlan.Trait.INCOMPLETE_INPUTS, ExpressionPlan.Trait.NEEDS_APPLIED, ExpressionPlan.Trait.NON_SCALAR_INPUTS, ExpressionPlan.Trait.NON_SCALAR_OUTPUT);
if (supportsVector && expression.canVectorize(inspector)) {
// make sure to compute the output type for a vector expression though, because we might have skipped it earlier
// due to unknown inputs, but that's ok here since it just means it doesnt exist
outputType = expression.getOutputType(inspector);
traits.add(ExpressionPlan.Trait.VECTORIZABLE);
}
return new ExpressionPlan(inspector, expression, analysis, traits, outputType, singleInputType, Sets.union(noCapabilities, maybeMultiValued), needsApplied);
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class HashJoinSegmentStorageAdapterTest method test_getColumnCapabilities_factToCountryNonexistentFactColumn.
@Test
public void test_getColumnCapabilities_factToCountryNonexistentFactColumn() {
final ColumnCapabilities capabilities = makeFactToCountrySegment().getColumnCapabilities("nonexistent");
Assert.assertNull(capabilities);
}
use of org.apache.druid.segment.column.ColumnCapabilities in project druid by apache.
the class LookupJoinableTest method getColumnCapabilitiesForUnknownColumnShouldReturnNull.
@Test
public void getColumnCapabilitiesForUnknownColumnShouldReturnNull() {
ColumnCapabilities capabilities = target.getColumnCapabilities(UNKNOWN_COLUMN);
Assert.assertNull(capabilities);
}
Aggregations