Search in sources :

Example 6 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class DruidQuery method toTopNQuery.

/**
 * Return this query as a TopN query, or null if this query is not compatible with TopN.
 *
 * @return query or null
 */
@Nullable
private TopNQuery toTopNQuery(final QueryFeatureInspector queryFeatureInspector) {
    // Must be allowed by the QueryMaker.
    if (!queryFeatureInspector.feature(QueryFeature.CAN_RUN_TOPN)) {
        return null;
    }
    // Must have GROUP BY one column, no GROUPING SETS, ORDER BY ≤ 1 column, LIMIT > 0 and ≤ maxTopNLimit,
    // no OFFSET, no HAVING.
    final boolean topNOk = grouping != null && grouping.getDimensions().size() == 1 && !grouping.getSubtotals().hasEffect(grouping.getDimensionSpecs()) && sorting != null && (sorting.getOrderBys().size() <= 1 && sorting.getOffsetLimit().hasLimit() && sorting.getOffsetLimit().getLimit() > 0 && sorting.getOffsetLimit().getLimit() <= plannerContext.getPlannerConfig().getMaxTopNLimit() && !sorting.getOffsetLimit().hasOffset()) && grouping.getHavingFilter() == null;
    if (!topNOk) {
        return null;
    }
    final DimensionSpec dimensionSpec = Iterables.getOnlyElement(grouping.getDimensions()).toDimensionSpec();
    // grouping col cannot be type array
    if (dimensionSpec.getOutputType().isArray()) {
        return null;
    }
    final OrderByColumnSpec limitColumn;
    if (sorting.getOrderBys().isEmpty()) {
        limitColumn = new OrderByColumnSpec(dimensionSpec.getOutputName(), OrderByColumnSpec.Direction.ASCENDING, Calcites.getStringComparatorForValueType(dimensionSpec.getOutputType()));
    } else {
        limitColumn = Iterables.getOnlyElement(sorting.getOrderBys());
    }
    final TopNMetricSpec topNMetricSpec;
    if (limitColumn.getDimension().equals(dimensionSpec.getOutputName())) {
        // DimensionTopNMetricSpec is exact; always return it even if allowApproximate is false.
        final DimensionTopNMetricSpec baseMetricSpec = new DimensionTopNMetricSpec(null, limitColumn.getDimensionComparator());
        topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? baseMetricSpec : new InvertedTopNMetricSpec(baseMetricSpec);
    } else if (plannerContext.getPlannerConfig().isUseApproximateTopN()) {
        // ORDER BY metric
        final NumericTopNMetricSpec baseMetricSpec = new NumericTopNMetricSpec(limitColumn.getDimension());
        topNMetricSpec = limitColumn.getDirection() == OrderByColumnSpec.Direction.ASCENDING ? new InvertedTopNMetricSpec(baseMetricSpec) : baseMetricSpec;
    } else {
        return null;
    }
    final Pair<DataSource, Filtration> dataSourceFiltrationPair = getFiltration(dataSource, filter, virtualColumnRegistry);
    final DataSource newDataSource = dataSourceFiltrationPair.lhs;
    final Filtration filtration = dataSourceFiltrationPair.rhs;
    final List<PostAggregator> postAggregators = new ArrayList<>(grouping.getPostAggregators());
    if (sorting.getProjection() != null) {
        postAggregators.addAll(sorting.getProjection().getPostAggregators());
    }
    return new TopNQuery(newDataSource, getVirtualColumns(true), dimensionSpec, topNMetricSpec, Ints.checkedCast(sorting.getOffsetLimit().getLimit()), filtration.getQuerySegmentSpec(), filtration.getDimFilter(), Granularities.ALL, grouping.getAggregatorFactories(), postAggregators, ImmutableSortedMap.copyOf(plannerContext.getQueryContext()));
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) Filtration(org.apache.druid.sql.calcite.filtration.Filtration) PostAggregator(org.apache.druid.query.aggregation.PostAggregator) IntArrayList(it.unimi.dsi.fastutil.ints.IntArrayList) ArrayList(java.util.ArrayList) InvertedTopNMetricSpec(org.apache.druid.query.topn.InvertedTopNMetricSpec) TopNMetricSpec(org.apache.druid.query.topn.TopNMetricSpec) NumericTopNMetricSpec(org.apache.druid.query.topn.NumericTopNMetricSpec) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) DataSource(org.apache.druid.query.DataSource) QueryDataSource(org.apache.druid.query.QueryDataSource) JoinDataSource(org.apache.druid.query.JoinDataSource) OrderByColumnSpec(org.apache.druid.query.groupby.orderby.OrderByColumnSpec) DimensionTopNMetricSpec(org.apache.druid.query.topn.DimensionTopNMetricSpec) InvertedTopNMetricSpec(org.apache.druid.query.topn.InvertedTopNMetricSpec) NumericTopNMetricSpec(org.apache.druid.query.topn.NumericTopNMetricSpec) TopNQuery(org.apache.druid.query.topn.TopNQuery) Nullable(javax.annotation.Nullable)

Example 7 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class CardinalityAggregatorBenchmark method setUp.

@Override
protected void setUp() {
    Iterable<String[]> values = FluentIterable.from(ContiguousSet.create(Range.closedOpen(0, 500), DiscreteDomain.integers())).transform(new Function<Integer, String[]>() {

        @Override
        public String[] apply(Integer input) {
            if (multivaluedSized == 1) {
                return new String[] { input.toString() };
            } else {
                String[] res = new String[multivaluedSized];
                String value = input.toString();
                for (int i = 0; i < multivaluedSized; ++i) {
                    res[i] = value + i;
                }
                return res;
            }
        }
    }).cycle().limit(MAX);
    final DimensionSpec dimSpec1 = new DefaultDimensionSpec("dim1", "dim1");
    final CardinalityAggregatorTest.TestDimensionSelector dim1 = new CardinalityAggregatorTest.TestDimensionSelector(values, null);
    final ColumnSelectorPlus<CardinalityAggregatorColumnSelectorStrategy> dimInfo1 = new ColumnSelectorPlus(dimSpec1.getDimension(), dimSpec1.getOutputName(), new StringCardinalityAggregatorColumnSelectorStrategy(), dim1);
    selectorList = Collections.singletonList((DimensionSelector) dim1);
    dimInfos = new ColumnSelectorPlus[] { dimInfo1 };
    agg = new CardinalityBufferAggregator(dimInfos, byRow);
    CardinalityAggregatorFactory factory = new CardinalityAggregatorFactory("billy", Collections.singletonList(new DefaultDimensionSpec("dim1", "dim1")), byRow);
    int maxSize = factory.getMaxIntermediateSizeWithNulls();
    buf = ByteBuffer.allocate(maxSize + 64);
    pos = 10;
    buf.limit(pos + maxSize);
    agg.init(buf, pos);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnSelectorPlus(org.apache.druid.query.ColumnSelectorPlus) DimensionSelector(org.apache.druid.segment.DimensionSelector) CardinalityAggregatorColumnSelectorStrategy(org.apache.druid.query.aggregation.cardinality.types.CardinalityAggregatorColumnSelectorStrategy) StringCardinalityAggregatorColumnSelectorStrategy(org.apache.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorColumnSelectorStrategy) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) StringCardinalityAggregatorColumnSelectorStrategy(org.apache.druid.query.aggregation.cardinality.types.StringCardinalityAggregatorColumnSelectorStrategy)

Example 8 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class ExpressionVirtualColumnTest method testMultiObjectSelectorMakesRightSelector.

@Test
public void testMultiObjectSelectorMakesRightSelector() {
    DimensionSpec spec = new DefaultDimensionSpec("expr", "expr");
    // do some ugly faking to test if SingleStringInputDeferredEvaluationExpressionDimensionSelector is created for multi-value expressions when possible
    ColumnSelectorFactory factory = new ColumnSelectorFactory() {

        @Override
        public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
            DimensionSelector delegate = COLUMN_SELECTOR_FACTORY.makeDimensionSelector(dimensionSpec);
            DimensionSelector faker = new DimensionSelector() {

                @Override
                public IndexedInts getRow() {
                    return delegate.getRow();
                }

                @Override
                public ValueMatcher makeValueMatcher(@Nullable String value) {
                    return delegate.makeValueMatcher(value);
                }

                @Override
                public ValueMatcher makeValueMatcher(Predicate<String> predicate) {
                    return delegate.makeValueMatcher(predicate);
                }

                @Override
                public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                    delegate.inspectRuntimeShape(inspector);
                }

                @Nullable
                @Override
                public Object getObject() {
                    return delegate.getObject();
                }

                @Override
                public Class<?> classOfObject() {
                    return delegate.classOfObject();
                }

                @Override
                public int getValueCardinality() {
                    // value doesn't matter as long as not CARDINALITY_UNKNOWN
                    return 3;
                }

                @Nullable
                @Override
                public String lookupName(int id) {
                    return null;
                }

                @Override
                public boolean nameLookupPossibleInAdvance() {
                    // fake this so when SingleStringInputDeferredEvaluationExpressionDimensionSelector it doesn't explode
                    return true;
                }

                @Nullable
                @Override
                public IdLookup idLookup() {
                    return name -> 0;
                }
            };
            return faker;
        }

        @Override
        public ColumnValueSelector makeColumnValueSelector(String columnName) {
            return COLUMN_SELECTOR_FACTORY.makeColumnValueSelector(columnName);
        }

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String column) {
            return new ColumnCapabilitiesImpl().setType(ColumnType.STRING).setHasMultipleValues(true).setDictionaryEncoded(true);
        }
    };
    final BaseObjectColumnValueSelector selectorImplicit = SCALE_LIST_SELF_IMPLICIT.makeDimensionSelector(spec, factory);
    final BaseObjectColumnValueSelector selectorExplicit = SCALE_LIST_SELF_EXPLICIT.makeDimensionSelector(spec, factory);
    Assert.assertTrue(selectorImplicit instanceof SingleStringInputDeferredEvaluationExpressionDimensionSelector);
    Assert.assertTrue(selectorExplicit instanceof ExpressionMultiValueDimensionSelector);
}
Also used : Arrays(java.util.Arrays) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) ConstantMultiValueDimensionSelector(org.apache.druid.segment.ConstantMultiValueDimensionSelector) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) RowAdapters(org.apache.druid.segment.RowAdapters) Parser(org.apache.druid.math.expr.Parser) IdLookup(org.apache.druid.segment.IdLookup) IndexedInts(org.apache.druid.segment.data.IndexedInts) BaseFloatColumnValueSelector(org.apache.druid.segment.BaseFloatColumnValueSelector) Row(org.apache.druid.data.input.Row) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) ImmutableList(com.google.common.collect.ImmutableList) Predicates(com.google.common.base.Predicates) DimensionSelector(org.apache.druid.segment.DimensionSelector) BucketExtractionFn(org.apache.druid.query.extraction.BucketExtractionFn) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) Nullable(javax.annotation.Nullable) ValueMatcher(org.apache.druid.query.filter.ValueMatcher) DateTimes(org.apache.druid.java.util.common.DateTimes) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ImmutableMap(com.google.common.collect.ImmutableMap) ValueType(org.apache.druid.segment.column.ValueType) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprEval(org.apache.druid.math.expr.ExprEval) InputRow(org.apache.druid.data.input.InputRow) BaseLongColumnValueSelector(org.apache.druid.segment.BaseLongColumnValueSelector) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) Predicate(com.google.common.base.Predicate) NullHandling(org.apache.druid.common.config.NullHandling) RowSignature(org.apache.druid.segment.column.RowSignature) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) ColumnType(org.apache.druid.segment.column.ColumnType) Assert(org.junit.Assert) ConstantDimensionSelector(org.apache.druid.segment.ConstantDimensionSelector) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) ExtractionDimensionSpec(org.apache.druid.query.dimension.ExtractionDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ConstantMultiValueDimensionSelector(org.apache.druid.segment.ConstantMultiValueDimensionSelector) DimensionSelector(org.apache.druid.segment.DimensionSelector) ConstantDimensionSelector(org.apache.druid.segment.ConstantDimensionSelector) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) BaseObjectColumnValueSelector(org.apache.druid.segment.BaseObjectColumnValueSelector) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) Predicate(com.google.common.base.Predicate) Nullable(javax.annotation.Nullable) ColumnCapabilitiesImpl(org.apache.druid.segment.column.ColumnCapabilitiesImpl) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 9 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class BuiltinApproxCountDistinctSqlAggregator method toDruidAggregation.

@Nullable
@Override
public Aggregation toDruidAggregation(final PlannerContext plannerContext, final RowSignature rowSignature, final VirtualColumnRegistry virtualColumnRegistry, final RexBuilder rexBuilder, final String name, final AggregateCall aggregateCall, final Project project, final List<Aggregation> existingAggregations, final boolean finalizeAggregations) {
    // Don't use Aggregations.getArgumentsForSimpleAggregator, since it won't let us use direct column access
    // for string columns.
    final RexNode rexNode = Expressions.fromFieldAccess(rowSignature, project, Iterables.getOnlyElement(aggregateCall.getArgList()));
    final DruidExpression arg = Expressions.toDruidExpression(plannerContext, rowSignature, rexNode);
    if (arg == null) {
        return null;
    }
    final AggregatorFactory aggregatorFactory;
    final String aggregatorName = finalizeAggregations ? Calcites.makePrefixedName(name, "a") : name;
    if (arg.isDirectColumnAccess() && rowSignature.getColumnType(arg.getDirectColumn()).map(type -> type.is(ValueType.COMPLEX)).orElse(false)) {
        aggregatorFactory = new HyperUniquesAggregatorFactory(aggregatorName, arg.getDirectColumn(), false, true);
    } else {
        final RelDataType dataType = rexNode.getType();
        final ColumnType inputType = Calcites.getColumnTypeForRelDataType(dataType);
        if (inputType == null) {
            throw new ISE("Cannot translate sqlTypeName[%s] to Druid type for field[%s]", dataType.getSqlTypeName(), aggregatorName);
        }
        final DimensionSpec dimensionSpec;
        if (arg.isSimpleExtraction()) {
            dimensionSpec = arg.getSimpleExtraction().toDimensionSpec(null, inputType);
        } else {
            String virtualColumnName = virtualColumnRegistry.getOrCreateVirtualColumnForExpression(arg, dataType);
            dimensionSpec = new DefaultDimensionSpec(virtualColumnName, null, inputType);
        }
        aggregatorFactory = new CardinalityAggregatorFactory(aggregatorName, null, ImmutableList.of(dimensionSpec), false, true);
    }
    return Aggregation.create(Collections.singletonList(aggregatorFactory), finalizeAggregations ? new HyperUniqueFinalizingPostAggregator(name, aggregatorFactory.getName()) : null);
}
Also used : DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ColumnType(org.apache.druid.segment.column.ColumnType) DruidExpression(org.apache.druid.sql.calcite.expression.DruidExpression) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) RelDataType(org.apache.calcite.rel.type.RelDataType) ISE(org.apache.druid.java.util.common.ISE) HyperUniqueFinalizingPostAggregator(org.apache.druid.query.aggregation.hyperloglog.HyperUniqueFinalizingPostAggregator) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) HyperUniquesAggregatorFactory(org.apache.druid.query.aggregation.hyperloglog.HyperUniquesAggregatorFactory) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) CardinalityAggregatorFactory(org.apache.druid.query.aggregation.cardinality.CardinalityAggregatorFactory) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) RexNode(org.apache.calcite.rex.RexNode) Nullable(javax.annotation.Nullable)

Example 10 with DimensionSpec

use of org.apache.druid.query.dimension.DimensionSpec in project druid by druid-io.

the class IncrementalIndex method makeColumnSelectorFactory.

/**
 * Column selector used at ingestion time for inputs to aggregators.
 *
 * @param agg                       the aggregator
 * @param in                        ingestion-time input row supplier
 * @param deserializeComplexMetrics whether complex objects should be deserialized by a {@link ComplexMetricExtractor}
 *
 * @return column selector factory
 */
public static ColumnSelectorFactory makeColumnSelectorFactory(final VirtualColumns virtualColumns, final AggregatorFactory agg, final Supplier<InputRow> in, final boolean deserializeComplexMetrics) {
    // we use RowSignature.empty() because ColumnInspector here should be the InputRow schema, not the
    // IncrementalIndex schema, because we are reading values from the InputRow
    final RowBasedColumnSelectorFactory<InputRow> baseSelectorFactory = RowBasedColumnSelectorFactory.create(RowAdapters.standardRow(), in::get, RowSignature.empty(), true);
    class IncrementalIndexInputRowColumnSelectorFactory implements ColumnSelectorFactory {

        @Override
        public ColumnValueSelector<?> makeColumnValueSelector(final String column) {
            final boolean isComplexMetric = agg.getIntermediateType().is(ValueType.COMPLEX);
            final ColumnValueSelector selector = baseSelectorFactory.makeColumnValueSelector(column);
            if (!isComplexMetric || !deserializeComplexMetrics) {
                return selector;
            } else {
                // Wrap selector in a special one that uses ComplexMetricSerde to modify incoming objects.
                // For complex aggregators that read from multiple columns, we wrap all of them. This is not ideal but it
                // has worked so far.
                final String complexTypeName = agg.getIntermediateType().getComplexTypeName();
                final ComplexMetricSerde serde = ComplexMetrics.getSerdeForType(complexTypeName);
                if (serde == null) {
                    throw new ISE("Don't know how to handle type[%s]", complexTypeName);
                }
                final ComplexMetricExtractor extractor = serde.getExtractor();
                return new ColumnValueSelector() {

                    @Override
                    public boolean isNull() {
                        return selector.isNull();
                    }

                    @Override
                    public long getLong() {
                        return selector.getLong();
                    }

                    @Override
                    public float getFloat() {
                        return selector.getFloat();
                    }

                    @Override
                    public double getDouble() {
                        return selector.getDouble();
                    }

                    @Override
                    public Class classOfObject() {
                        return extractor.extractedClass();
                    }

                    @Nullable
                    @Override
                    public Object getObject() {
                        // Here is where the magic happens: read from "in" directly, don't go through the normal "selector".
                        return extractor.extractValue(in.get(), column, agg);
                    }

                    @Override
                    public void inspectRuntimeShape(RuntimeShapeInspector inspector) {
                        inspector.visit("in", in);
                        inspector.visit("selector", selector);
                        inspector.visit("extractor", extractor);
                    }
                };
            }
        }

        @Override
        public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
            return baseSelectorFactory.makeDimensionSelector(dimensionSpec);
        }

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String columnName) {
            return baseSelectorFactory.getColumnCapabilities(columnName);
        }
    }
    return virtualColumns.wrap(new IncrementalIndexInputRowColumnSelectorFactory());
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) ComplexMetricSerde(org.apache.druid.segment.serde.ComplexMetricSerde) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) RowBasedColumnSelectorFactory(org.apache.druid.segment.RowBasedColumnSelectorFactory) ComplexMetricExtractor(org.apache.druid.segment.serde.ComplexMetricExtractor) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) InputRow(org.apache.druid.data.input.InputRow) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) ISE(org.apache.druid.java.util.common.ISE) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) NilColumnValueSelector(org.apache.druid.segment.NilColumnValueSelector)

Aggregations

DimensionSpec (org.apache.druid.query.dimension.DimensionSpec)53 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)27 AggregatorFactory (org.apache.druid.query.aggregation.AggregatorFactory)20 ArrayList (java.util.ArrayList)19 HashMap (java.util.HashMap)16 Nullable (javax.annotation.Nullable)15 Test (org.junit.Test)15 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)14 MapBasedRow (org.apache.druid.data.input.MapBasedRow)12 Row (org.apache.druid.data.input.Row)12 ISE (org.apache.druid.java.util.common.ISE)12 PostAggregator (org.apache.druid.query.aggregation.PostAggregator)11 Map (java.util.Map)10 ColumnType (org.apache.druid.segment.column.ColumnType)10 List (java.util.List)9 LongSumAggregatorFactory (org.apache.druid.query.aggregation.LongSumAggregatorFactory)9 LongMeanAveragerFactory (org.apache.druid.query.movingaverage.averagers.LongMeanAveragerFactory)9 HashSet (java.util.HashSet)8 Function (com.google.common.base.Function)7 ImmutableList (com.google.common.collect.ImmutableList)7