use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.
the class ExpressionSelectorsTest method test_multi_value_string_bindings.
@Test
public void test_multi_value_string_bindings() {
final String columnName = "multi-string3";
for (StorageAdapter adapter : ADAPTERS) {
Sequence<Cursor> cursorSequence = adapter.makeCursors(null, adapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
List<Cursor> flatten = cursorSequence.toList();
for (Cursor cursor : flatten) {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
// identifier, uses dimension selector supplier supplier, no null coercion
ExpressionPlan plan = ExpressionPlanner.plan(adapter, Parser.parse("\"multi-string3\"", TestExprMacroTable.INSTANCE));
// array output, uses object selector supplier, no null coercion
ExpressionPlan plan2 = ExpressionPlanner.plan(adapter, Parser.parse("array_append(\"multi-string3\", 'foo')", TestExprMacroTable.INSTANCE));
// array input, uses dimension selector supplier, no null coercion
ExpressionPlan plan3 = ExpressionPlanner.plan(adapter, Parser.parse("array_length(\"multi-string3\")", TestExprMacroTable.INSTANCE));
// used as scalar, has null coercion
ExpressionPlan plan4 = ExpressionPlanner.plan(adapter, Parser.parse("concat(\"multi-string3\", 'foo')", TestExprMacroTable.INSTANCE));
Expr.ObjectBinding bindings = ExpressionSelectors.createBindings(factory, plan);
Expr.ObjectBinding bindings2 = ExpressionSelectors.createBindings(factory, plan2);
Expr.ObjectBinding bindings3 = ExpressionSelectors.createBindings(factory, plan3);
Expr.ObjectBinding bindings4 = ExpressionSelectors.createBindings(factory, plan4);
DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(columnName));
ColumnValueSelector valueSelector = factory.makeColumnValueSelector(columnName);
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
Object valueSelectorVal = valueSelector.getObject();
Object bindingVal = bindings.get(columnName);
Object bindingVal2 = bindings2.get(columnName);
Object bindingVal3 = bindings3.get(columnName);
Object bindingVal4 = bindings4.get(columnName);
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
Assert.assertNull(valueSelectorVal);
Assert.assertNull(bindingVal);
Assert.assertNull(bindingVal2);
Assert.assertNull(bindingVal3);
// binding4 has null coercion
Assert.assertArrayEquals(new Object[] { null }, (Object[]) bindingVal4);
} else {
Assert.assertArrayEquals(((List) dimSelectorVal).toArray(), (Object[]) bindingVal);
Assert.assertArrayEquals(((List) valueSelectorVal).toArray(), (Object[]) bindingVal);
Assert.assertArrayEquals(((List) dimSelectorVal).toArray(), (Object[]) bindingVal2);
Assert.assertArrayEquals(((List) valueSelectorVal).toArray(), (Object[]) bindingVal2);
Assert.assertArrayEquals(((List) dimSelectorVal).toArray(), (Object[]) bindingVal3);
Assert.assertArrayEquals(((List) valueSelectorVal).toArray(), (Object[]) bindingVal3);
}
cursor.advance();
}
}
}
}
use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.
the class ExpressionSelectorsTest method test_single_value_string_bindings.
@Test
public void test_single_value_string_bindings() {
final String columnName = "string3";
for (StorageAdapter adapter : ADAPTERS) {
Sequence<Cursor> cursorSequence = adapter.makeCursors(null, adapter.getInterval(), VirtualColumns.EMPTY, Granularities.ALL, false, null);
List<Cursor> flatten = cursorSequence.toList();
for (Cursor cursor : flatten) {
ColumnSelectorFactory factory = cursor.getColumnSelectorFactory();
ExpressionPlan plan = ExpressionPlanner.plan(adapter, Parser.parse("\"string3\"", TestExprMacroTable.INSTANCE));
ExpressionPlan plan2 = ExpressionPlanner.plan(adapter, Parser.parse("concat(\"string3\", 'foo')", TestExprMacroTable.INSTANCE));
Expr.ObjectBinding bindings = ExpressionSelectors.createBindings(factory, plan);
Expr.ObjectBinding bindings2 = ExpressionSelectors.createBindings(factory, plan2);
DimensionSelector dimSelector = factory.makeDimensionSelector(DefaultDimensionSpec.of(columnName));
ColumnValueSelector valueSelector = factory.makeColumnValueSelector(columnName);
// realtime index needs to handle as multi-value in case any new values are added during processing
final boolean isMultiVal = factory.getColumnCapabilities(columnName) == null || factory.getColumnCapabilities(columnName).hasMultipleValues().isMaybeTrue();
while (!cursor.isDone()) {
Object dimSelectorVal = dimSelector.getObject();
Object valueSelectorVal = valueSelector.getObject();
Object bindingVal = bindings.get(columnName);
Object bindingVal2 = bindings2.get(columnName);
if (dimSelectorVal == null) {
Assert.assertNull(dimSelectorVal);
Assert.assertNull(valueSelectorVal);
Assert.assertNull(bindingVal);
if (isMultiVal) {
Assert.assertNull(((Object[]) bindingVal2)[0]);
} else {
Assert.assertNull(bindingVal2);
}
} else {
if (isMultiVal) {
Assert.assertEquals(dimSelectorVal, ((Object[]) bindingVal)[0]);
Assert.assertEquals(valueSelectorVal, ((Object[]) bindingVal)[0]);
Assert.assertEquals(dimSelectorVal, ((Object[]) bindingVal2)[0]);
Assert.assertEquals(valueSelectorVal, ((Object[]) bindingVal2)[0]);
} else {
Assert.assertEquals(dimSelectorVal, bindingVal);
Assert.assertEquals(valueSelectorVal, bindingVal);
Assert.assertEquals(dimSelectorVal, bindingVal2);
Assert.assertEquals(valueSelectorVal, bindingVal2);
}
}
cursor.advance();
}
}
}
}
use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.
the class IncrementalIndex method makeColumnSelectorFactory.
/**
* Column selector used at ingestion time for inputs to aggregators.
*
* @param agg the aggregator
* @param in ingestion-time input row supplier
* @param deserializeComplexMetrics whether complex objects should be deserialized by a {@link ComplexMetricExtractor}
*
* @return column selector factory
*/
public static ColumnSelectorFactory makeColumnSelectorFactory(final VirtualColumns virtualColumns, final AggregatorFactory agg, final Supplier<InputRow> in, final boolean deserializeComplexMetrics) {
// we use RowSignature.empty() because ColumnInspector here should be the InputRow schema, not the
// IncrementalIndex schema, because we are reading values from the InputRow
final RowBasedColumnSelectorFactory<InputRow> baseSelectorFactory = RowBasedColumnSelectorFactory.create(RowAdapters.standardRow(), in::get, RowSignature.empty(), true);
class IncrementalIndexInputRowColumnSelectorFactory implements ColumnSelectorFactory {
@Override
public ColumnValueSelector<?> makeColumnValueSelector(final String column) {
final boolean isComplexMetric = agg.getIntermediateType().is(ValueType.COMPLEX);
final ColumnValueSelector selector = baseSelectorFactory.makeColumnValueSelector(column);
if (!isComplexMetric || !deserializeComplexMetrics) {
return selector;
} else {
// Wrap selector in a special one that uses ComplexMetricSerde to modify incoming objects.
// For complex aggregators that read from multiple columns, we wrap all of them. This is not ideal but it
// has worked so far.
final String complexTypeName = agg.getIntermediateType().getComplexTypeName();
final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(complexTypeName);
if (serde == null) {
throw new ISE("Don't know how to handle type[%s]", complexTypeName);
}
final ComplexMetricExtractor extractor = serde.getExtractor();
return new ColumnValueSelector() {
@Override
public boolean isNull() {
return selector.isNull();
}
@Override
public long getLong() {
return selector.getLong();
}
@Override
public float getFloat() {
return selector.getFloat();
}
@Override
public double getDouble() {
return selector.getDouble();
}
@Override
public Class classOfObject() {
return extractor.extractedClass();
}
@Nullable
@Override
public Object getObject() {
// Here is where the magic happens: read from "in" directly, don't go through the normal "selector".
return extractor.extractValue(in.get(), column, agg);
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
inspector.visit("in", in);
inspector.visit("selector", selector);
inspector.visit("extractor", extractor);
}
};
}
}
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
return baseSelectorFactory.makeDimensionSelector(dimensionSpec);
}
@Nullable
@Override
public ColumnCapabilities getColumnCapabilities(String columnName) {
return baseSelectorFactory.getColumnCapabilities(columnName);
}
}
return virtualColumns.wrap(new IncrementalIndexInputRowColumnSelectorFactory());
}
use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.
the class FilteredAggregatorTest method makeColumnSelector.
private ColumnSelectorFactory makeColumnSelector(final TestFloatColumnSelector selector) {
return new ColumnSelectorFactory() {
@Override
public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
final String dimensionName = dimensionSpec.getDimension();
if ("dim".equals(dimensionName)) {
return dimensionSpec.decorate(new AbstractDimensionSelector() {
@Override
public IndexedInts getRow() {
SingleIndexedInt row = new SingleIndexedInt();
if (selector.getIndex() % 3 == 2) {
row.setValue(1);
} else {
row.setValue(0);
}
return row;
}
@Override
public ValueMatcher makeValueMatcher(String value) {
return DimensionSelectorUtils.makeValueMatcherGeneric(this, value);
}
@Override
public ValueMatcher makeValueMatcher(Predicate<String> predicate) {
return DimensionSelectorUtils.makeValueMatcherGeneric(this, predicate);
}
@Override
public int getValueCardinality() {
return 2;
}
@Override
public String lookupName(int id) {
switch(id) {
case 0:
return "a";
case 1:
return "b";
default:
throw new IllegalArgumentException();
}
}
@Override
public boolean nameLookupPossibleInAdvance() {
return true;
}
@Nullable
@Override
public IdLookup idLookup() {
return new IdLookup() {
@Override
public int lookupId(String name) {
switch(name) {
case "a":
return 0;
case "b":
return 1;
default:
throw new IllegalArgumentException();
}
}
};
}
@Override
public Class classOfObject() {
return Object.class;
}
@Override
public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
// Don't care about runtime shape in tests
}
});
} else {
throw new UnsupportedOperationException();
}
}
@Override
public ColumnValueSelector<?> makeColumnValueSelector(String columnName) {
if ("value".equals(columnName)) {
return selector;
} else {
throw new UnsupportedOperationException();
}
}
@Override
public ColumnCapabilities getColumnCapabilities(String columnName) {
ColumnCapabilitiesImpl caps;
if ("value".equals(columnName)) {
caps = new ColumnCapabilitiesImpl();
caps.setType(ColumnType.FLOAT);
caps.setDictionaryEncoded(false);
caps.setHasBitmapIndexes(false);
} else {
caps = new ColumnCapabilitiesImpl();
caps.setType(ColumnType.STRING);
caps.setDictionaryEncoded(true);
caps.setHasBitmapIndexes(true);
}
return caps;
}
};
}
use of org.apache.druid.segment.ColumnSelectorFactory in project druid by druid-io.
the class RowBasedGrouperHelper method createGrouperAccumulatorPair.
/**
* Create a {@link Grouper} that groups according to the dimensions and aggregators in "query", along with
* an {@link Accumulator} that accepts ResultRows and forwards them to the grouper.
*
* The pair will operate in one of two modes:
*
* 1) Combining mode (used if "subquery" is null). In this mode, filters from the "query" are ignored, and
* its aggregators are converted into combining form. The input ResultRows are assumed to be partially-grouped
* results originating from the provided "query".
*
* 2) Subquery mode (used if "subquery" is nonnull). In this mode, filters from the "query" (both intervals
* and dim filters) are respected, and its aggregators are used in standard (not combining) form. The input
* ResultRows are assumed to be results originating from the provided "subquery".
*
* @param query query that we are grouping for
* @param subquery optional subquery that we are receiving results from (see combining vs. subquery
* mode above)
* @param config groupBy query config
* @param bufferSupplier supplier of merge buffers
* @param combineBufferHolder holder of combine buffers. Unused if concurrencyHint = -1, and may be null in that case
* @param concurrencyHint -1 for single-threaded Grouper, >=1 for concurrent Grouper
* @param temporaryStorage temporary storage used for spilling from the Grouper
* @param spillMapper object mapper used for spilling from the Grouper
* @param grouperSorter executor service used for parallel combining. Unused if concurrencyHint = -1, and may
* be null in that case
* @param priority query priority
* @param hasQueryTimeout whether or not this query has a timeout
* @param queryTimeoutAt when this query times out, in milliseconds since the epoch
* @param mergeBufferSize size of the merge buffers from "bufferSupplier"
*/
public static Pair<Grouper<RowBasedKey>, Accumulator<AggregateResult, ResultRow>> createGrouperAccumulatorPair(final GroupByQuery query, @Nullable final GroupByQuery subquery, final GroupByQueryConfig config, final Supplier<ByteBuffer> bufferSupplier, @Nullable final ReferenceCountingResourceHolder<ByteBuffer> combineBufferHolder, final int concurrencyHint, final LimitedTemporaryStorage temporaryStorage, final ObjectMapper spillMapper, @Nullable final ListeningExecutorService grouperSorter, final int priority, final boolean hasQueryTimeout, final long queryTimeoutAt, final int mergeBufferSize) {
// concurrencyHint >= 1 for concurrent groupers, -1 for single-threaded
Preconditions.checkArgument(concurrencyHint >= 1 || concurrencyHint == -1, "invalid concurrencyHint");
if (concurrencyHint >= 1) {
Preconditions.checkNotNull(grouperSorter, "grouperSorter executor must be provided");
}
// See method-level javadoc; we go into combining mode if there is no subquery.
final boolean combining = subquery == null;
final List<ColumnType> valueTypes = DimensionHandlerUtils.getValueTypesFromDimensionSpecs(query.getDimensions());
final GroupByQueryConfig querySpecificConfig = config.withOverrides(query);
final boolean includeTimestamp = query.getResultRowHasTimestamp();
final ThreadLocal<ResultRow> columnSelectorRow = new ThreadLocal<>();
ColumnSelectorFactory columnSelectorFactory = createResultRowBasedColumnSelectorFactory(combining ? query : subquery, columnSelectorRow::get, RowSignature.Finalization.UNKNOWN);
// Apply virtual columns if we are in subquery (non-combining) mode.
if (!combining) {
columnSelectorFactory = query.getVirtualColumns().wrap(columnSelectorFactory);
}
final boolean willApplyLimitPushDown = query.isApplyLimitPushDown();
final DefaultLimitSpec limitSpec = willApplyLimitPushDown ? (DefaultLimitSpec) query.getLimitSpec() : null;
boolean sortHasNonGroupingFields = false;
if (willApplyLimitPushDown) {
sortHasNonGroupingFields = DefaultLimitSpec.sortingOrderHasNonGroupingFields(limitSpec, query.getDimensions());
}
final AggregatorFactory[] aggregatorFactories;
if (combining) {
aggregatorFactories = query.getAggregatorSpecs().stream().map(AggregatorFactory::getCombiningFactory).toArray(AggregatorFactory[]::new);
} else {
aggregatorFactories = query.getAggregatorSpecs().toArray(new AggregatorFactory[0]);
}
final Grouper.KeySerdeFactory<RowBasedKey> keySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), querySpecificConfig.getMaxMergingDictionarySize() / (concurrencyHint == -1 ? 1 : concurrencyHint), valueTypes, aggregatorFactories, limitSpec);
final Grouper<RowBasedKey> grouper;
if (concurrencyHint == -1) {
grouper = new SpillingGrouper<>(bufferSupplier, keySerdeFactory, columnSelectorFactory, aggregatorFactories, querySpecificConfig.getBufferGrouperMaxSize(), querySpecificConfig.getBufferGrouperMaxLoadFactor(), querySpecificConfig.getBufferGrouperInitialBuckets(), temporaryStorage, spillMapper, true, limitSpec, sortHasNonGroupingFields, mergeBufferSize);
} else {
final Grouper.KeySerdeFactory<RowBasedKey> combineKeySerdeFactory = new RowBasedKeySerdeFactory(includeTimestamp, query.getContextSortByDimsFirst(), query.getDimensions(), // use entire dictionary space for combining key serde
querySpecificConfig.getMaxMergingDictionarySize(), valueTypes, aggregatorFactories, limitSpec);
grouper = new ConcurrentGrouper<>(querySpecificConfig, bufferSupplier, combineBufferHolder, keySerdeFactory, combineKeySerdeFactory, columnSelectorFactory, aggregatorFactories, temporaryStorage, spillMapper, concurrencyHint, limitSpec, sortHasNonGroupingFields, grouperSorter, priority, hasQueryTimeout, queryTimeoutAt);
}
final int keySize = includeTimestamp ? query.getDimensions().size() + 1 : query.getDimensions().size();
final ValueExtractFunction valueExtractFn = makeValueExtractFunction(query, combining, includeTimestamp, columnSelectorFactory, valueTypes);
final Predicate<ResultRow> rowPredicate;
if (combining) {
// Filters are not applied in combining mode.
rowPredicate = row -> true;
} else {
rowPredicate = getResultRowPredicate(query, subquery);
}
final Accumulator<AggregateResult, ResultRow> accumulator = (priorResult, row) -> {
BaseQuery.checkInterrupted();
if (priorResult != null && !priorResult.isOk()) {
// Pass-through error returns without doing more work.
return priorResult;
}
if (!grouper.isInitialized()) {
grouper.init();
}
if (!rowPredicate.test(row)) {
return AggregateResult.ok();
}
columnSelectorRow.set(row);
final Comparable[] key = new Comparable[keySize];
valueExtractFn.apply(row, key);
final AggregateResult aggregateResult = grouper.aggregate(new RowBasedKey(key));
columnSelectorRow.set(null);
return aggregateResult;
};
return new Pair<>(grouper, accumulator);
}
Aggregations