Search in sources :

Example 1 with AggregationContext

use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.

the class GroupByOptimizedIterator method applyAggregatesGroupedByKey.

private static Map<BytesRef, Object[]> applyAggregatesGroupedByKey(BigArrays bigArrays, IndexSearcher indexSearcher, String keyColumnName, List<AggregationContext> aggregations, List<? extends LuceneCollectorExpression<?>> expressions, List<CollectExpression<Row, ?>> aggExpressions, RamAccounting ramAccounting, MemoryManager memoryManager, Version minNodeVersion, InputRow inputRow, Query query, AtomicReference<Throwable> killed) throws IOException {
    final HashMap<BytesRef, Object[]> statesByKey = new HashMap<>();
    final Weight weight = indexSearcher.createWeight(indexSearcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
    final List<LeafReaderContext> leaves = indexSearcher.getTopReaderContext().leaves();
    Object[] nullStates = null;
    for (LeafReaderContext leaf : leaves) {
        raiseIfClosedOrKilled(killed);
        Scorer scorer = weight.scorer(leaf);
        if (scorer == null) {
            continue;
        }
        var readerContext = new ReaderContext(leaf);
        for (int i = 0, expressionsSize = expressions.size(); i < expressionsSize; i++) {
            expressions.get(i).setNextReader(readerContext);
        }
        SortedSetDocValues values = DocValues.getSortedSet(leaf.reader(), keyColumnName);
        try (ObjectArray<Object[]> statesByOrd = bigArrays.newObjectArray(values.getValueCount())) {
            DocIdSetIterator docs = scorer.iterator();
            Bits liveDocs = leaf.reader().getLiveDocs();
            for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc()) {
                raiseIfClosedOrKilled(killed);
                if (docDeleted(liveDocs, doc)) {
                    continue;
                }
                for (int i = 0, expressionsSize = expressions.size(); i < expressionsSize; i++) {
                    expressions.get(i).setNextDocId(doc);
                }
                for (int i = 0, expressionsSize = aggExpressions.size(); i < expressionsSize; i++) {
                    aggExpressions.get(i).setNextRow(inputRow);
                }
                if (values.advanceExact(doc)) {
                    long ord = values.nextOrd();
                    Object[] states = statesByOrd.get(ord);
                    if (states == null) {
                        statesByOrd.set(ord, initStates(aggregations, ramAccounting, memoryManager, minNodeVersion));
                    } else {
                        aggregateValues(aggregations, ramAccounting, memoryManager, states);
                    }
                    if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
                        throw new GroupByOnArrayUnsupportedException(keyColumnName);
                    }
                } else {
                    if (nullStates == null) {
                        nullStates = initStates(aggregations, ramAccounting, memoryManager, minNodeVersion);
                    } else {
                        aggregateValues(aggregations, ramAccounting, memoryManager, nullStates);
                    }
                }
            }
            for (long ord = 0; ord < statesByOrd.size(); ord++) {
                raiseIfClosedOrKilled(killed);
                Object[] states = statesByOrd.get(ord);
                if (states == null) {
                    continue;
                }
                BytesRef sharedKey = values.lookupOrd(ord);
                Object[] prevStates = statesByKey.get(sharedKey);
                if (prevStates == null) {
                    ramAccounting.addBytes(StringSizeEstimator.estimateSize(sharedKey) + HASH_MAP_ENTRY_OVERHEAD);
                    statesByKey.put(BytesRef.deepCopyOf(sharedKey), states);
                } else {
                    for (int i = 0; i < aggregations.size(); i++) {
                        AggregationContext aggregation = aggregations.get(i);
                        // noinspection unchecked
                        prevStates[i] = aggregation.function().reduce(ramAccounting, prevStates[i], states[i]);
                    }
                }
            }
        }
    }
    if (nullStates != null) {
        statesByKey.put(null, nullStates);
    }
    return statesByKey;
}
Also used : AggregationContext(io.crate.execution.engine.aggregation.AggregationContext) HashMap(java.util.HashMap) Scorer(org.apache.lucene.search.Scorer) Weight(org.apache.lucene.search.Weight) SortedSetDocValues(org.apache.lucene.index.SortedSetDocValues) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) ReaderContext(io.crate.execution.engine.fetch.ReaderContext) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) Bits(org.apache.lucene.util.Bits) GroupByOnArrayUnsupportedException(io.crate.exceptions.GroupByOnArrayUnsupportedException) DocIdSetIterator(org.apache.lucene.search.DocIdSetIterator) BytesRef(org.apache.lucene.util.BytesRef)

Example 2 with AggregationContext

use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.

the class InputFactoryTest method testProcessGroupByProjectionSymbolsAggregation.

@Test
public void testProcessGroupByProjectionSymbolsAggregation() throws Exception {
    // select count(x), x, y * 2 ... group by x, y * 2
    // keys: [ in(0), in(1) + 10 ]
    List<Symbol> keys = Arrays.asList(new InputColumn(0, DataTypes.LONG), add);
    Function countX = (Function) expressions.asSymbol("count(x)");
    // values: [ count(in(0)) ]
    List<Aggregation> values = List.of(new Aggregation(countX.signature(), countX.valueType(), List.of(new InputColumn(0))));
    InputFactory.Context<CollectExpression<Row, ?>> ctx = factory.ctxForAggregations(txnCtx);
    ctx.add(keys);
    // inputs: [ x, add ]
    List<Input<?>> keyInputs = ctx.topLevelInputs();
    ctx.add(values);
    List<AggregationContext> aggregations = ctx.aggregations();
    assertThat(aggregations.size(), is(1));
    // collectExpressions: [ in0, in1 ]
    List<CollectExpression<Row, ?>> expressions = new ArrayList<>(ctx.expressions());
    assertThat(expressions.size(), is(2));
    List<Input<?>> allInputs = ctx.topLevelInputs();
    // only 2 because count is no input
    assertThat(allInputs.size(), is(2));
    RowN row = new RowN(1L, 2L);
    for (CollectExpression<Row, ?> expression : expressions) {
        expression.setNextRow(row);
    }
    assertThat(expressions.get(0).value(), is(1L));
    // raw input value
    assertThat(expressions.get(1).value(), is(2L));
    assertThat(keyInputs.size(), is(2));
    assertThat(keyInputs.get(0).value(), is(1L));
    // 2 + 10
    assertThat(keyInputs.get(1).value(), is(12));
}
Also used : AggregationContext(io.crate.execution.engine.aggregation.AggregationContext) Symbol(io.crate.expression.symbol.Symbol) ArrayList(java.util.ArrayList) CollectExpression(io.crate.execution.engine.collect.CollectExpression) Aggregation(io.crate.expression.symbol.Aggregation) Function(io.crate.expression.symbol.Function) Input(io.crate.data.Input) RowN(io.crate.data.RowN) InputColumn(io.crate.expression.symbol.InputColumn) Row(io.crate.data.Row) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 3 with AggregationContext

use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.

the class InputFactoryTest method testAggregationSymbolsInputReuse.

@Test
public void testAggregationSymbolsInputReuse() throws Exception {
    Function countX = (Function) expressions.asSymbol("count(x)");
    Function avgX = (Function) expressions.asSymbol("avg(x)");
    List<Symbol> aggregations = Arrays.asList(new Aggregation(countX.signature(), countX.signature().getReturnType().createType(), List.of(new InputColumn(0))), new Aggregation(avgX.signature(), avgX.signature().getReturnType().createType(), List.of(new InputColumn(0))));
    InputFactory.Context<CollectExpression<Row, ?>> ctx = factory.ctxForAggregations(txnCtx);
    ctx.add(aggregations);
    List<AggregationContext> aggregationContexts = ctx.aggregations();
    Input<?> inputCount = aggregationContexts.get(0).inputs()[0];
    Input<?> inputAverage = aggregationContexts.get(1).inputs()[0];
    assertSame(inputCount, inputAverage);
}
Also used : Aggregation(io.crate.expression.symbol.Aggregation) AggregationContext(io.crate.execution.engine.aggregation.AggregationContext) Function(io.crate.expression.symbol.Function) Symbol(io.crate.expression.symbol.Symbol) InputColumn(io.crate.expression.symbol.InputColumn) CollectExpression(io.crate.execution.engine.collect.CollectExpression) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Example 4 with AggregationContext

use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.

the class GroupByOptimizedIteratorTest method prepare.

@Before
public void prepare() throws Exception {
    NodeContext nodeCtx = createNodeContext();
    IndexWriter iw = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
    columnName = "x";
    expectedResult = new ArrayList<>(20);
    for (long i = 0; i < 20; i++) {
        Document doc = new Document();
        String val = "val_" + i;
        doc.add(new SortedSetDocValuesField(columnName, new BytesRef(val)));
        iw.addDocument(doc);
        expectedResult.add(new Object[] { val, 1L });
    }
    iw.commit();
    indexSearcher = new IndexSearcher(DirectoryReader.open(iw));
    inExpr = new InputCollectExpression(0);
    CountAggregation aggregation = (CountAggregation) nodeCtx.functions().getQualified(CountAggregation.COUNT_STAR_SIGNATURE, Collections.emptyList(), CountAggregation.COUNT_STAR_SIGNATURE.getReturnType().createType());
    aggregationContexts = List.of(new AggregationContext(aggregation, () -> true, List.of()));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) AggregationContext(io.crate.execution.engine.aggregation.AggregationContext) TestingHelpers.createNodeContext(io.crate.testing.TestingHelpers.createNodeContext) NodeContext(io.crate.metadata.NodeContext) Document(org.apache.lucene.document.Document) IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) CountAggregation(io.crate.execution.engine.aggregation.impl.CountAggregation) SortedSetDocValuesField(org.apache.lucene.document.SortedSetDocValuesField) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Before(org.junit.Before)

Example 5 with AggregationContext

use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.

the class GroupByOptimizedIterator method initStates.

private static Object[] initStates(List<AggregationContext> aggregations, RamAccounting ramAccounting, MemoryManager memoryManager, Version minNodeVersion) {
    Object[] states = new Object[aggregations.size()];
    for (int i = 0; i < aggregations.size(); i++) {
        AggregationContext aggregation = aggregations.get(i);
        AggregationFunction function = aggregation.function();
        var newState = function.newState(ramAccounting, Version.CURRENT, minNodeVersion, memoryManager);
        if (InputCondition.matches(aggregation.filter())) {
            // noinspection unchecked
            states[i] = function.iterate(ramAccounting, memoryManager, newState, aggregation.inputs());
        } else {
            states[i] = newState;
        }
    }
    return states;
}
Also used : AggregationContext(io.crate.execution.engine.aggregation.AggregationContext) AggregationFunction(io.crate.execution.engine.aggregation.AggregationFunction)

Aggregations

AggregationContext (io.crate.execution.engine.aggregation.AggregationContext)6 CollectExpression (io.crate.execution.engine.collect.CollectExpression)2 Aggregation (io.crate.expression.symbol.Aggregation)2 Function (io.crate.expression.symbol.Function)2 InputColumn (io.crate.expression.symbol.InputColumn)2 Symbol (io.crate.expression.symbol.Symbol)2 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)2 BytesRef (org.apache.lucene.util.BytesRef)2 Test (org.junit.Test)2 RamAccounting (io.crate.breaker.RamAccounting)1 Input (io.crate.data.Input)1 Row (io.crate.data.Row)1 RowN (io.crate.data.RowN)1 GroupByOnArrayUnsupportedException (io.crate.exceptions.GroupByOnArrayUnsupportedException)1 GroupProjection (io.crate.execution.dsl.projection.GroupProjection)1 AggregationFunction (io.crate.execution.engine.aggregation.AggregationFunction)1 CountAggregation (io.crate.execution.engine.aggregation.impl.CountAggregation)1 ReaderContext (io.crate.execution.engine.fetch.ReaderContext)1 SharedShardContext (io.crate.execution.jobs.SharedShardContext)1 InputFactory (io.crate.expression.InputFactory)1