Search in sources :

Example 56 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class StringTopNColumnAggregatesProcessor method scanAndAggregateWithCardinalityKnown.

private long scanAndAggregateWithCardinalityKnown(TopNQuery query, Cursor cursor, DimensionSelector selector, Aggregator[][] rowSelector) {
    long processedRows = 0;
    while (!cursor.isDone()) {
        final IndexedInts dimValues = selector.getRow();
        for (int i = 0, size = dimValues.size(); i < size; ++i) {
            final int dimIndex = dimValues.get(i);
            Aggregator[] aggs = rowSelector[dimIndex];
            if (aggs == null) {
                final Comparable<?> key = dimensionValueConverter.apply(selector.lookupName(dimIndex));
                aggs = aggregatesStore.computeIfAbsent(key, k -> BaseTopNAlgorithm.makeAggregators(cursor, query.getAggregatorSpecs()));
                rowSelector[dimIndex] = aggs;
            }
            for (Aggregator aggregator : aggs) {
                aggregator.aggregate();
            }
        }
        cursor.advance();
        processedRows++;
    }
    return processedRows;
}
Also used : DimensionHandlerUtils(org.apache.druid.segment.DimensionHandlerUtils) HashMap(java.util.HashMap) Aggregator(org.apache.druid.query.aggregation.Aggregator) IndexedInts(org.apache.druid.segment.data.IndexedInts) StorageAdapter(org.apache.druid.segment.StorageAdapter) Function(java.util.function.Function) TopNQuery(org.apache.druid.query.topn.TopNQuery) DimensionDictionarySelector(org.apache.druid.segment.DimensionDictionarySelector) Cursor(org.apache.druid.segment.Cursor) BaseTopNAlgorithm(org.apache.druid.query.topn.BaseTopNAlgorithm) Map(java.util.Map) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) ColumnType(org.apache.druid.segment.column.ColumnType) DimensionSelector(org.apache.druid.segment.DimensionSelector) TopNResultBuilder(org.apache.druid.query.topn.TopNResultBuilder) TopNParams(org.apache.druid.query.topn.TopNParams) IndexedInts(org.apache.druid.segment.data.IndexedInts) Aggregator(org.apache.druid.query.aggregation.Aggregator)

Example 57 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class HashJoinEngine method makeJoinCursor.

/**
 * Creates a cursor that represents the join of {@param leftCursor} with {@param joinableClause}. The resulting
 * cursor may generate nulls on the left-hand side (for righty joins; see {@link JoinType#isRighty()}) or on
 * the right-hand side (for lefty joins; see {@link JoinType#isLefty()}). Columns that start with the
 * joinable clause's prefix (see {@link JoinableClause#getPrefix()}) will come from the Joinable's column selector
 * factory, and all other columns will come from the leftCursor's column selector factory.
 *
 * Ensuring that the joinable clause's prefix does not conflict with any columns from "leftCursor" is the
 * responsibility of the caller. If there is such a conflict (for example, if the joinable clause's prefix is "j.",
 * and the leftCursor has a field named "j.j.abrams"), then the field from the leftCursor will be shadowed and will
 * not be queryable through the returned Cursor. This happens even if the right-hand joinable doesn't actually have a
 * column with this name.
 */
public static Cursor makeJoinCursor(final Cursor leftCursor, final JoinableClause joinableClause, final boolean descending, final Closer closer) {
    final ColumnSelectorFactory leftColumnSelectorFactory = leftCursor.getColumnSelectorFactory();
    final JoinMatcher joinMatcher = joinableClause.getJoinable().makeJoinMatcher(leftColumnSelectorFactory, joinableClause.getCondition(), joinableClause.getJoinType().isRighty(), descending, closer);
    class JoinColumnSelectorFactory implements ColumnSelectorFactory {

        @Override
        public DimensionSelector makeDimensionSelector(DimensionSpec dimensionSpec) {
            if (joinableClause.includesColumn(dimensionSpec.getDimension())) {
                return joinMatcher.getColumnSelectorFactory().makeDimensionSelector(dimensionSpec.withDimension(joinableClause.unprefix(dimensionSpec.getDimension())));
            } else {
                final DimensionSelector leftSelector = leftColumnSelectorFactory.makeDimensionSelector(dimensionSpec);
                if (!joinableClause.getJoinType().isRighty()) {
                    return leftSelector;
                } else {
                    return new PossiblyNullDimensionSelector(leftSelector, joinMatcher::matchingRemainder);
                }
            }
        }

        @Override
        public ColumnValueSelector makeColumnValueSelector(String column) {
            if (joinableClause.includesColumn(column)) {
                return joinMatcher.getColumnSelectorFactory().makeColumnValueSelector(joinableClause.unprefix(column));
            } else {
                final ColumnValueSelector<?> leftSelector = leftColumnSelectorFactory.makeColumnValueSelector(column);
                if (!joinableClause.getJoinType().isRighty()) {
                    return leftSelector;
                } else {
                    return new PossiblyNullColumnValueSelector<>(leftSelector, joinMatcher::matchingRemainder);
                }
            }
        }

        @Nullable
        @Override
        public ColumnCapabilities getColumnCapabilities(String column) {
            if (joinableClause.includesColumn(column)) {
                return joinMatcher.getColumnSelectorFactory().getColumnCapabilities(joinableClause.unprefix(column));
            } else {
                return leftColumnSelectorFactory.getColumnCapabilities(column);
            }
        }
    }
    final JoinColumnSelectorFactory joinColumnSelectorFactory = new JoinColumnSelectorFactory();
    class JoinCursor implements Cursor {

        public void initialize() {
            matchCurrentPosition();
            if (!joinableClause.getJoinType().isLefty()) {
                while (!joinMatcher.hasMatch() && !isDone()) {
                    advance();
                    matchCurrentPosition();
                }
            }
        }

        @Override
        @Nonnull
        public ColumnSelectorFactory getColumnSelectorFactory() {
            return joinColumnSelectorFactory;
        }

        @Override
        @Nonnull
        public DateTime getTime() {
            return leftCursor.getTime();
        }

        @Override
        public void advance() {
            advanceUninterruptibly();
            BaseQuery.checkInterrupted();
        }

        private void matchCurrentPosition() {
            if (leftCursor.isDone()) {
                if (joinableClause.getJoinType().isRighty() && !joinMatcher.matchingRemainder()) {
                    // Warning! The way this engine handles "righty" joins is flawed: it generates the 'remainder' rows
                    // per-segment, but this should really be done globally. This should be improved in the future.
                    joinMatcher.matchRemainder();
                }
            } else {
                joinMatcher.matchCondition();
            }
        }

        @Override
        public void advanceUninterruptibly() {
            if (joinMatcher.hasMatch()) {
                joinMatcher.nextMatch();
                if (joinMatcher.hasMatch()) {
                    return;
                }
            }
            assert !joinMatcher.hasMatch();
            if (leftCursor.isDone()) {
                // No right-hand matches and nothing on the left cursor. We're done; return.
                assert isDone();
                return;
            }
            do {
                // No more right-hand side matches; advance the left-hand side.
                leftCursor.advanceUninterruptibly();
                // Update joinMatcher state to match new cursor position.
                matchCurrentPosition();
            // If this is not a left/full join, and joinMatcher didn't match anything, then keep advancing until we find
            // left rows that have matching right rows.
            } while (!joinableClause.getJoinType().isLefty() && !joinMatcher.hasMatch() && !leftCursor.isDone());
        }

        @Override
        public boolean isDone() {
            return leftCursor.isDone() && !joinMatcher.hasMatch();
        }

        @Override
        public boolean isDoneOrInterrupted() {
            return isDone() || Thread.currentThread().isInterrupted();
        }

        @Override
        public void reset() {
            leftCursor.reset();
            joinMatcher.reset();
        }
    }
    final JoinCursor joinCursor = new JoinCursor();
    joinCursor.initialize();
    return joinCursor;
}
Also used : DimensionSpec(org.apache.druid.query.dimension.DimensionSpec) DimensionSelector(org.apache.druid.segment.DimensionSelector) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) Cursor(org.apache.druid.segment.Cursor)

Example 58 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class ListFilteredDimensionSpecTest method testDecoratorWithBlacklistUsingNonPresentValues.

@Test
public void testDecoratorWithBlacklistUsingNonPresentValues() {
    ListFilteredDimensionSpec spec = new ListFilteredDimensionSpec(new DefaultDimensionSpec("foo", "bar"), ImmutableSet.of("c", "gx"), false);
    DimensionSelector selector = spec.decorate(TestDimensionSelector.INSTANCE);
    Assert.assertEquals(25, selector.getValueCardinality());
    IndexedInts row = selector.getRow();
    Assert.assertEquals(2, row.size());
    Assert.assertEquals(3, row.get(0));
    Assert.assertEquals(5, row.get(1));
    Assert.assertEquals("e", selector.lookupName(row.get(0)));
    Assert.assertEquals("g", selector.lookupName(row.get(1)));
    Assert.assertEquals("a", selector.lookupName(0));
    Assert.assertEquals("z", selector.lookupName(24));
    Assert.assertEquals(0, selector.idLookup().lookupId("a"));
    Assert.assertEquals(24, selector.idLookup().lookupId("z"));
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) IndexedInts(org.apache.druid.segment.data.IndexedInts) Test(org.junit.Test)

Example 59 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class RegexFilteredDimensionSpecTest method testDecorator.

@Test
public void testDecorator() {
    RegexFilteredDimensionSpec spec = new RegexFilteredDimensionSpec(new DefaultDimensionSpec("foo", "bar"), "[c,g]");
    DimensionSelector selector = spec.decorate(TestDimensionSelector.INSTANCE);
    Assert.assertEquals(2, selector.getValueCardinality());
    IndexedInts row = selector.getRow();
    Assert.assertEquals(2, row.size());
    Assert.assertEquals(0, row.get(0));
    Assert.assertEquals(1, row.get(1));
    Assert.assertEquals("c", selector.lookupName(0));
    Assert.assertEquals("g", selector.lookupName(1));
    Assert.assertEquals(0, selector.idLookup().lookupId("c"));
    Assert.assertEquals(1, selector.idLookup().lookupId("g"));
}
Also used : DimensionSelector(org.apache.druid.segment.DimensionSelector) IndexedInts(org.apache.druid.segment.data.IndexedInts) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Example 60 with DimensionSelector

use of org.apache.druid.segment.DimensionSelector in project druid by druid-io.

the class ExpressionSelectorsTest method test_incrementalIndexStringSelector.

@Test
public void test_incrementalIndexStringSelector() throws IndexSizeExceededException {
    // This test covers a regression caused by ColumnCapabilites.isDictionaryEncoded not matching the value of
    // DimensionSelector.nameLookupPossibleInAdvance in the indexers of an IncrementalIndex, which resulted in an
    // exception trying to make an optimized string expression selector that was not appropriate to use for the
    // underlying dimension selector.
    // This occurred during schemaless ingestion with spare dimension values and no explicit null rows, so the
    // conditions are replicated by this test. See https://github.com/apache/druid/pull/10248 for details
    IncrementalIndexSchema schema = new IncrementalIndexSchema(0, new TimestampSpec("time", "millis", DateTimes.nowUtc()), Granularities.NONE, VirtualColumns.EMPTY, DimensionsSpec.EMPTY, new AggregatorFactory[] { new CountAggregatorFactory("count") }, true);
    IncrementalIndex index = new OnheapIncrementalIndex.Builder().setMaxRowCount(100).setIndexSchema(schema).build();
    index.add(new MapBasedInputRow(DateTimes.nowUtc().getMillis(), ImmutableList.of("x"), ImmutableMap.of("x", "foo")));
    index.add(new MapBasedInputRow(DateTimes.nowUtc().plusMillis(1000).getMillis(), ImmutableList.of("y"), ImmutableMap.of("y", "foo")));
    IncrementalIndexStorageAdapter adapter = new IncrementalIndexStorageAdapter(index);
    Sequence<Cursor> cursors = adapter.makeCursors(null, Intervals.ETERNITY, VirtualColumns.EMPTY, Granularities.ALL, false, null);
    int rowsProcessed = cursors.map(cursor -> {
        DimensionSelector xExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(x, 'foo')", ExprMacroTable.nil()), null);
        DimensionSelector yExprSelector = ExpressionSelectors.makeDimensionSelector(cursor.getColumnSelectorFactory(), Parser.parse("concat(y, 'foo')", ExprMacroTable.nil()), null);
        int rowCount = 0;
        while (!cursor.isDone()) {
            Object x = xExprSelector.getObject();
            Object y = yExprSelector.getObject();
            List<String> expectedFoo = Collections.singletonList("foofoo");
            List<String> expectedNull = NullHandling.replaceWithDefault() ? Collections.singletonList("foo") : Collections.singletonList(null);
            if (rowCount == 0) {
                Assert.assertEquals(expectedFoo, x);
                Assert.assertEquals(expectedNull, y);
            } else {
                Assert.assertEquals(expectedNull, x);
                Assert.assertEquals(expectedFoo, y);
            }
            rowCount++;
            cursor.advance();
        }
        return rowCount;
    }).accumulate(0, (in, acc) -> in + acc);
    Assert.assertEquals(2, rowsProcessed);
}
Also used : SegmentGenerator(org.apache.druid.segment.generator.SegmentGenerator) ColumnValueSelector(org.apache.druid.segment.ColumnValueSelector) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) StorageAdapter(org.apache.druid.segment.StorageAdapter) DefaultDimensionSpec(org.apache.druid.query.dimension.DefaultDimensionSpec) GeneratorBasicSchemas(org.apache.druid.segment.generator.GeneratorBasicSchemas) ColumnSelectorFactory(org.apache.druid.segment.ColumnSelectorFactory) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) Expr(org.apache.druid.math.expr.Expr) DateTimes(org.apache.druid.java.util.common.DateTimes) Sequence(org.apache.druid.java.util.common.guava.Sequence) AfterClass(org.junit.AfterClass) ImmutableMap(com.google.common.collect.ImmutableMap) Closer(org.apache.druid.java.util.common.io.Closer) AggregatorFactory(org.apache.druid.query.aggregation.AggregatorFactory) QueryableIndex(org.apache.druid.segment.QueryableIndex) TestExprMacroTable(org.apache.druid.query.expression.TestExprMacroTable) ExprEval(org.apache.druid.math.expr.ExprEval) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) TestObjectColumnSelector(org.apache.druid.segment.TestObjectColumnSelector) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) ExprMacroTable(org.apache.druid.math.expr.ExprMacroTable) IndexSizeExceededException(org.apache.druid.segment.incremental.IndexSizeExceededException) List(java.util.List) LinearShardSpec(org.apache.druid.timeline.partition.LinearShardSpec) DataSegment(org.apache.druid.timeline.DataSegment) ColumnCapabilities(org.apache.druid.segment.column.ColumnCapabilities) BeforeClass(org.junit.BeforeClass) Intervals(org.apache.druid.java.util.common.Intervals) RuntimeShapeInspector(org.apache.druid.query.monomorphicprocessing.RuntimeShapeInspector) Supplier(com.google.common.base.Supplier) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) Parser(org.apache.druid.math.expr.Parser) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) SettableSupplier(org.apache.druid.common.guava.SettableSupplier) DimensionSelector(org.apache.druid.segment.DimensionSelector) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) QueryableIndexStorageAdapter(org.apache.druid.segment.QueryableIndexStorageAdapter) VirtualColumns(org.apache.druid.segment.VirtualColumns) GeneratorSchemaInfo(org.apache.druid.segment.generator.GeneratorSchemaInfo) DimensionsSpec(org.apache.druid.data.input.impl.DimensionsSpec) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test) Granularities(org.apache.druid.java.util.common.granularity.Granularities) Cursor(org.apache.druid.segment.Cursor) NullHandling(org.apache.druid.common.config.NullHandling) Assert(org.junit.Assert) CloseableUtils(org.apache.druid.utils.CloseableUtils) Collections(java.util.Collections) BaseSingleValueDimensionSelector(org.apache.druid.segment.BaseSingleValueDimensionSelector) DimensionSelector(org.apache.druid.segment.DimensionSelector) IncrementalIndex(org.apache.druid.segment.incremental.IncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) OnheapIncrementalIndex(org.apache.druid.segment.incremental.OnheapIncrementalIndex) Cursor(org.apache.druid.segment.Cursor) CountAggregatorFactory(org.apache.druid.query.aggregation.CountAggregatorFactory) TimestampSpec(org.apache.druid.data.input.impl.TimestampSpec) IncrementalIndexStorageAdapter(org.apache.druid.segment.incremental.IncrementalIndexStorageAdapter) List(java.util.List) ArrayList(java.util.ArrayList) ImmutableList(com.google.common.collect.ImmutableList) MapBasedInputRow(org.apache.druid.data.input.MapBasedInputRow) IncrementalIndexSchema(org.apache.druid.segment.incremental.IncrementalIndexSchema) InitializedNullHandlingTest(org.apache.druid.testing.InitializedNullHandlingTest) Test(org.junit.Test)

Aggregations

DimensionSelector (org.apache.druid.segment.DimensionSelector)66 Test (org.junit.Test)36 DefaultDimensionSpec (org.apache.druid.query.dimension.DefaultDimensionSpec)31 Cursor (org.apache.druid.segment.Cursor)24 InitializedNullHandlingTest (org.apache.druid.testing.InitializedNullHandlingTest)24 IndexedInts (org.apache.druid.segment.data.IndexedInts)22 List (java.util.List)14 VirtualColumns (org.apache.druid.segment.VirtualColumns)14 ImmutableList (com.google.common.collect.ImmutableList)12 StorageAdapter (org.apache.druid.segment.StorageAdapter)12 ArrayList (java.util.ArrayList)11 Intervals (org.apache.druid.java.util.common.Intervals)11 Granularities (org.apache.druid.java.util.common.granularity.Granularities)11 Sequence (org.apache.druid.java.util.common.guava.Sequence)11 QueryableIndexStorageAdapter (org.apache.druid.segment.QueryableIndexStorageAdapter)11 NullHandling (org.apache.druid.common.config.NullHandling)10 ColumnSelectorFactory (org.apache.druid.segment.ColumnSelectorFactory)9 DataSegment (org.apache.druid.timeline.DataSegment)9 ConstantDimensionSelector (org.apache.druid.segment.ConstantDimensionSelector)8 ValueMatcher (org.apache.druid.query.filter.ValueMatcher)7