use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.
the class GroupByOptimizedIterator method applyAggregatesGroupedByKey.
private static Map<BytesRef, Object[]> applyAggregatesGroupedByKey(BigArrays bigArrays, IndexSearcher indexSearcher, String keyColumnName, List<AggregationContext> aggregations, List<? extends LuceneCollectorExpression<?>> expressions, List<CollectExpression<Row, ?>> aggExpressions, RamAccounting ramAccounting, MemoryManager memoryManager, Version minNodeVersion, InputRow inputRow, Query query, AtomicReference<Throwable> killed) throws IOException {
final HashMap<BytesRef, Object[]> statesByKey = new HashMap<>();
final Weight weight = indexSearcher.createWeight(indexSearcher.rewrite(query), ScoreMode.COMPLETE_NO_SCORES, 1f);
final List<LeafReaderContext> leaves = indexSearcher.getTopReaderContext().leaves();
Object[] nullStates = null;
for (LeafReaderContext leaf : leaves) {
raiseIfClosedOrKilled(killed);
Scorer scorer = weight.scorer(leaf);
if (scorer == null) {
continue;
}
var readerContext = new ReaderContext(leaf);
for (int i = 0, expressionsSize = expressions.size(); i < expressionsSize; i++) {
expressions.get(i).setNextReader(readerContext);
}
SortedSetDocValues values = DocValues.getSortedSet(leaf.reader(), keyColumnName);
try (ObjectArray<Object[]> statesByOrd = bigArrays.newObjectArray(values.getValueCount())) {
DocIdSetIterator docs = scorer.iterator();
Bits liveDocs = leaf.reader().getLiveDocs();
for (int doc = docs.nextDoc(); doc != DocIdSetIterator.NO_MORE_DOCS; doc = docs.nextDoc()) {
raiseIfClosedOrKilled(killed);
if (docDeleted(liveDocs, doc)) {
continue;
}
for (int i = 0, expressionsSize = expressions.size(); i < expressionsSize; i++) {
expressions.get(i).setNextDocId(doc);
}
for (int i = 0, expressionsSize = aggExpressions.size(); i < expressionsSize; i++) {
aggExpressions.get(i).setNextRow(inputRow);
}
if (values.advanceExact(doc)) {
long ord = values.nextOrd();
Object[] states = statesByOrd.get(ord);
if (states == null) {
statesByOrd.set(ord, initStates(aggregations, ramAccounting, memoryManager, minNodeVersion));
} else {
aggregateValues(aggregations, ramAccounting, memoryManager, states);
}
if (values.nextOrd() != SortedSetDocValues.NO_MORE_ORDS) {
throw new GroupByOnArrayUnsupportedException(keyColumnName);
}
} else {
if (nullStates == null) {
nullStates = initStates(aggregations, ramAccounting, memoryManager, minNodeVersion);
} else {
aggregateValues(aggregations, ramAccounting, memoryManager, nullStates);
}
}
}
for (long ord = 0; ord < statesByOrd.size(); ord++) {
raiseIfClosedOrKilled(killed);
Object[] states = statesByOrd.get(ord);
if (states == null) {
continue;
}
BytesRef sharedKey = values.lookupOrd(ord);
Object[] prevStates = statesByKey.get(sharedKey);
if (prevStates == null) {
ramAccounting.addBytes(StringSizeEstimator.estimateSize(sharedKey) + HASH_MAP_ENTRY_OVERHEAD);
statesByKey.put(BytesRef.deepCopyOf(sharedKey), states);
} else {
for (int i = 0; i < aggregations.size(); i++) {
AggregationContext aggregation = aggregations.get(i);
// noinspection unchecked
prevStates[i] = aggregation.function().reduce(ramAccounting, prevStates[i], states[i]);
}
}
}
}
}
if (nullStates != null) {
statesByKey.put(null, nullStates);
}
return statesByKey;
}
use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.
the class InputFactoryTest method testProcessGroupByProjectionSymbolsAggregation.
@Test
public void testProcessGroupByProjectionSymbolsAggregation() throws Exception {
// select count(x), x, y * 2 ... group by x, y * 2
// keys: [ in(0), in(1) + 10 ]
List<Symbol> keys = Arrays.asList(new InputColumn(0, DataTypes.LONG), add);
Function countX = (Function) expressions.asSymbol("count(x)");
// values: [ count(in(0)) ]
List<Aggregation> values = List.of(new Aggregation(countX.signature(), countX.valueType(), List.of(new InputColumn(0))));
InputFactory.Context<CollectExpression<Row, ?>> ctx = factory.ctxForAggregations(txnCtx);
ctx.add(keys);
// inputs: [ x, add ]
List<Input<?>> keyInputs = ctx.topLevelInputs();
ctx.add(values);
List<AggregationContext> aggregations = ctx.aggregations();
assertThat(aggregations.size(), is(1));
// collectExpressions: [ in0, in1 ]
List<CollectExpression<Row, ?>> expressions = new ArrayList<>(ctx.expressions());
assertThat(expressions.size(), is(2));
List<Input<?>> allInputs = ctx.topLevelInputs();
// only 2 because count is no input
assertThat(allInputs.size(), is(2));
RowN row = new RowN(1L, 2L);
for (CollectExpression<Row, ?> expression : expressions) {
expression.setNextRow(row);
}
assertThat(expressions.get(0).value(), is(1L));
// raw input value
assertThat(expressions.get(1).value(), is(2L));
assertThat(keyInputs.size(), is(2));
assertThat(keyInputs.get(0).value(), is(1L));
// 2 + 10
assertThat(keyInputs.get(1).value(), is(12));
}
use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.
the class InputFactoryTest method testAggregationSymbolsInputReuse.
@Test
public void testAggregationSymbolsInputReuse() throws Exception {
Function countX = (Function) expressions.asSymbol("count(x)");
Function avgX = (Function) expressions.asSymbol("avg(x)");
List<Symbol> aggregations = Arrays.asList(new Aggregation(countX.signature(), countX.signature().getReturnType().createType(), List.of(new InputColumn(0))), new Aggregation(avgX.signature(), avgX.signature().getReturnType().createType(), List.of(new InputColumn(0))));
InputFactory.Context<CollectExpression<Row, ?>> ctx = factory.ctxForAggregations(txnCtx);
ctx.add(aggregations);
List<AggregationContext> aggregationContexts = ctx.aggregations();
Input<?> inputCount = aggregationContexts.get(0).inputs()[0];
Input<?> inputAverage = aggregationContexts.get(1).inputs()[0];
assertSame(inputCount, inputAverage);
}
use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.
the class GroupByOptimizedIteratorTest method prepare.
@Before
public void prepare() throws Exception {
NodeContext nodeCtx = createNodeContext();
IndexWriter iw = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
columnName = "x";
expectedResult = new ArrayList<>(20);
for (long i = 0; i < 20; i++) {
Document doc = new Document();
String val = "val_" + i;
doc.add(new SortedSetDocValuesField(columnName, new BytesRef(val)));
iw.addDocument(doc);
expectedResult.add(new Object[] { val, 1L });
}
iw.commit();
indexSearcher = new IndexSearcher(DirectoryReader.open(iw));
inExpr = new InputCollectExpression(0);
CountAggregation aggregation = (CountAggregation) nodeCtx.functions().getQualified(CountAggregation.COUNT_STAR_SIGNATURE, Collections.emptyList(), CountAggregation.COUNT_STAR_SIGNATURE.getReturnType().createType());
aggregationContexts = List.of(new AggregationContext(aggregation, () -> true, List.of()));
}
use of io.crate.execution.engine.aggregation.AggregationContext in project crate by crate.
the class GroupByOptimizedIterator method initStates.
private static Object[] initStates(List<AggregationContext> aggregations, RamAccounting ramAccounting, MemoryManager memoryManager, Version minNodeVersion) {
Object[] states = new Object[aggregations.size()];
for (int i = 0; i < aggregations.size(); i++) {
AggregationContext aggregation = aggregations.get(i);
AggregationFunction function = aggregation.function();
var newState = function.newState(ramAccounting, Version.CURRENT, minNodeVersion, memoryManager);
if (InputCondition.matches(aggregation.filter())) {
// noinspection unchecked
states[i] = function.iterate(ramAccounting, memoryManager, newState, aggregation.inputs());
} else {
states[i] = newState;
}
}
return states;
}
Aggregations