Search in sources :

Example 1 with CollectorContext

use of io.crate.expression.reference.doc.lucene.CollectorContext in project crate by crate.

the class LuceneBatchIteratorBenchmark method createLuceneBatchIterator.

@Setup
public void createLuceneBatchIterator() throws Exception {
    IndexWriter iw = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
    String columnName = "x";
    for (int i = 0; i < 10_000_000; i++) {
        Document doc = new Document();
        doc.add(new NumericDocValuesField(columnName, i));
        iw.addDocument(doc);
    }
    iw.commit();
    iw.forceMerge(1, true);
    indexSearcher = new IndexSearcher(DirectoryReader.open(iw));
    IntegerColumnReference columnReference = new IntegerColumnReference(columnName);
    columnRefs = Collections.singletonList(columnReference);
    collectorContext = new CollectorContext();
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) IntegerColumnReference(io.crate.expression.reference.doc.lucene.IntegerColumnReference) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) Document(org.apache.lucene.document.Document) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Setup(org.openjdk.jmh.annotations.Setup)

Example 2 with CollectorContext

use of io.crate.expression.reference.doc.lucene.CollectorContext in project crate by crate.

the class LuceneShardCollectorProvider method getOrderedCollector.

@Override
public OrderedDocCollector getOrderedCollector(RoutedCollectPhase phase, SharedShardContext sharedShardContext, CollectTask collectTask, boolean requiresRepeat) {
    RoutedCollectPhase collectPhase = phase.normalize(shardNormalizer, collectTask.txnCtx());
    CollectorContext collectorContext;
    InputFactory.Context<? extends LuceneCollectorExpression<?>> ctx;
    var searcher = sharedShardContext.acquireSearcher("ordered-collector: " + formatSource(phase));
    collectTask.addSearcher(sharedShardContext.readerId(), searcher);
    IndexService indexService = sharedShardContext.indexService();
    QueryShardContext queryShardContext = indexService.newQueryShardContext();
    final var queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexService.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, indexService.cache());
    ctx = docInputFactory.extractImplementations(collectTask.txnCtx(), collectPhase);
    collectorContext = new CollectorContext(sharedShardContext.readerId());
    int batchSize = collectPhase.shardQueueSize(localNodeId.get());
    if (LOGGER.isTraceEnabled()) {
        LOGGER.trace("[{}][{}] creating LuceneOrderedDocCollector. Expected number of rows to be collected: {}", sharedShardContext.indexShard().routingEntry().currentNodeId(), sharedShardContext.indexShard().shardId(), batchSize);
    }
    OptimizeQueryForSearchAfter optimizeQueryForSearchAfter = new OptimizeQueryForSearchAfter(collectPhase.orderBy(), queryContext.queryShardContext(), fieldTypeLookup);
    return new LuceneOrderedDocCollector(indexShard.shardId(), searcher.item(), queryContext.query(), queryContext.minScore(), Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE), batchSize, collectTask.getRamAccounting(), collectorContext, optimizeQueryForSearchAfter, LuceneSortGenerator.generateLuceneSort(collectTask.txnCtx(), collectorContext, collectPhase.orderBy(), docInputFactory, fieldTypeLookup), ctx.topLevelInputs(), ctx.expressions());
}
Also used : InputFactory(io.crate.expression.InputFactory) OptimizeQueryForSearchAfter(io.crate.execution.engine.collect.collectors.OptimizeQueryForSearchAfter) IndexService(org.elasticsearch.index.IndexService) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) LuceneOrderedDocCollector(io.crate.execution.engine.collect.collectors.LuceneOrderedDocCollector) RoutedCollectPhase(io.crate.execution.dsl.phases.RoutedCollectPhase)

Example 3 with CollectorContext

use of io.crate.expression.reference.doc.lucene.CollectorContext in project crate by crate.

the class SortSymbolVisitor method customSortField.

private SortField customSortField(String name, final Symbol symbol, final SortSymbolContext context) {
    InputFactory.Context<? extends LuceneCollectorExpression<?>> inputContext = docInputFactory.getCtx(context.txnCtx);
    final Input<?> input = inputContext.add(symbol);
    final List<? extends LuceneCollectorExpression<?>> expressions = inputContext.expressions();
    final CollectorContext collectorContext = context.context;
    final boolean nullFirst = context.nullFirst;
    return new SortField(name, new FieldComparatorSource() {

        @Override
        public FieldComparator<?> newComparator(String fieldName, int numHits, int sortPos, boolean reversed) {
            for (int i = 0; i < expressions.size(); i++) {
                expressions.get(i).startCollect(collectorContext);
            }
            @SuppressWarnings("unchecked") DataType<Object> dataType = (DataType<Object>) symbol.valueType();
            Object nullSentinel = NullSentinelValues.nullSentinel(dataType, NullValueOrder.fromFlag(nullFirst), reversed);
            return new InputFieldComparator(numHits, expressions, input, // for `null` sentinels we need to have a comparator that can deal with that
            nullSentinel == null ? nullFirst ^ reversed ? Comparator.nullsFirst(dataType) : Comparator.nullsLast(dataType) : dataType, nullSentinel);
        }
    }, context.reverseFlag);
}
Also used : InputFactory(io.crate.expression.InputFactory) DocInputFactory(io.crate.execution.engine.collect.DocInputFactory) SortField(org.apache.lucene.search.SortField) SortedSetSortField(org.apache.lucene.search.SortedSetSortField) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) FieldComparatorSource(org.apache.lucene.search.FieldComparatorSource) FieldComparator(org.apache.lucene.search.FieldComparator) DataType(io.crate.types.DataType) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext)

Example 4 with CollectorContext

use of io.crate.expression.reference.doc.lucene.CollectorContext in project crate by crate.

the class OrderedLuceneBatchIteratorFactoryTest method createOrderedCollector.

private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, int shardId) {
    CollectorContext collectorContext = new CollectorContext();
    List<LuceneCollectorExpression<?>> expressions = Collections.singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o));
    return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.randomBase64UUID(), shardId), searcher, new MatchAllDocsQuery(), null, false, // batchSize < 10 to have at least one searchMore call.
    5, RamAccounting.NO_ACCOUNTING, collectorContext, f -> null, new Sort(new SortedNumericSortField(columnName, SortField.Type.LONG, reverseFlags[0])), expressions, expressions);
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Arrays(java.util.Arrays) RowAccounting(io.crate.breaker.RowAccounting) LongType(io.crate.types.LongType) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) RowAccountingWithEstimators(io.crate.breaker.RowAccountingWithEstimators) BatchIterator(io.crate.data.BatchIterator) BatchIteratorTester(io.crate.testing.BatchIteratorTester) AtomicReference(java.util.concurrent.atomic.AtomicReference) Mockito.doThrow(org.mockito.Mockito.doThrow) Document(org.apache.lucene.document.Document) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) ESTestCase(org.elasticsearch.test.ESTestCase) SortField(org.apache.lucene.search.SortField) OrderingByPosition(io.crate.execution.engine.sort.OrderingByPosition) Before(org.junit.Before) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LongStream(java.util.stream.LongStream) Sort(org.apache.lucene.search.Sort) DirectoryReader(org.apache.lucene.index.DirectoryReader) Reference(io.crate.metadata.Reference) UUIDs(org.elasticsearch.common.UUIDs) Test(org.junit.Test) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RamAccounting(io.crate.breaker.RamAccounting) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) IndexWriter(org.apache.lucene.index.IndexWriter) List(java.util.List) OrderBy(io.crate.analyze.OrderBy) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Row(io.crate.data.Row) CircuitBreakingException(org.elasticsearch.common.breaker.CircuitBreakingException) OrderByCollectorExpression(io.crate.expression.reference.doc.lucene.OrderByCollectorExpression) DataTypes(io.crate.types.DataTypes) Matchers.is(org.hamcrest.Matchers.is) TestingRowConsumer(io.crate.testing.TestingRowConsumer) Comparator(java.util.Comparator) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Collections(java.util.Collections) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) IndexSearcher(org.apache.lucene.search.IndexSearcher) Mockito.mock(org.mockito.Mockito.mock) ShardId(org.elasticsearch.index.shard.ShardId) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) OrderByCollectorExpression(io.crate.expression.reference.doc.lucene.OrderByCollectorExpression) Sort(org.apache.lucene.search.Sort) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression)

Example 5 with CollectorContext

use of io.crate.expression.reference.doc.lucene.CollectorContext in project crate by crate.

the class DocValuesGroupByOptimizedIteratorTest method test_group_by_doc_values_optimized_iterator_for_many_keys.

@Test
public void test_group_by_doc_values_optimized_iterator_for_many_keys() throws Exception {
    SumAggregation<?> sumAggregation = (SumAggregation<?>) functions.getQualified(Signature.aggregate(SumAggregation.NAME, DataTypes.LONG.getTypeSignature(), DataTypes.LONG.getTypeSignature()), List.of(DataTypes.LONG), DataTypes.LONG);
    var sumDocValuesAggregator = sumAggregation.getDocValueAggregator(List.of(new Reference(new ReferenceIdent(RelationName.fromIndexName("test"), "z"), RowGranularity.DOC, DataTypes.LONG, ColumnPolicy.DYNAMIC, IndexType.PLAIN, true, true, 0, null)), mock(DocTableInfo.class), List.of());
    var keyExpressions = List.of(new BytesRefColumnReference("x"), new LongColumnReference("y"));
    var keyRefs = List.of(new Reference(new ReferenceIdent(RelationName.fromIndexName("test"), "x"), RowGranularity.DOC, DataTypes.STRING, ColumnPolicy.DYNAMIC, IndexType.PLAIN, true, true, 1, null), new Reference(new ReferenceIdent(RelationName.fromIndexName("test"), "y"), RowGranularity.DOC, DataTypes.LONG, ColumnPolicy.DYNAMIC, IndexType.PLAIN, true, true, 2, null));
    var it = DocValuesGroupByOptimizedIterator.GroupByIterator.forManyKeys(List.of(sumDocValuesAggregator), indexSearcher, keyRefs, keyExpressions, RamAccounting.NO_ACCOUNTING, null, null, new MatchAllDocsQuery(), new CollectorContext());
    var rowConsumer = new TestingRowConsumer();
    rowConsumer.accept(it, null);
    assertThat(rowConsumer.getResult(), containsInAnyOrder(new Object[] { "0", 0L, 6L }, new Object[] { "1", 1L, 4L }));
}
Also used : DocTableInfo(io.crate.metadata.doc.DocTableInfo) BytesRefColumnReference(io.crate.expression.reference.doc.lucene.BytesRefColumnReference) BytesRefColumnReference(io.crate.expression.reference.doc.lucene.BytesRefColumnReference) AtomicReference(java.util.concurrent.atomic.AtomicReference) LongColumnReference(io.crate.expression.reference.doc.lucene.LongColumnReference) Reference(io.crate.metadata.Reference) SumAggregation(io.crate.execution.engine.aggregation.impl.SumAggregation) LongColumnReference(io.crate.expression.reference.doc.lucene.LongColumnReference) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ReferenceIdent(io.crate.metadata.ReferenceIdent) TestingRowConsumer(io.crate.testing.TestingRowConsumer) CrateDummyClusterServiceUnitTest(io.crate.test.integration.CrateDummyClusterServiceUnitTest) Test(org.junit.Test)

Aggregations

CollectorContext (io.crate.expression.reference.doc.lucene.CollectorContext)13 Reference (io.crate.metadata.Reference)6 InputFactory (io.crate.expression.InputFactory)5 AtomicReference (java.util.concurrent.atomic.AtomicReference)5 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)5 ShardId (org.elasticsearch.index.shard.ShardId)4 SharedShardContext (io.crate.execution.jobs.SharedShardContext)3 LuceneQueryBuilder (io.crate.lucene.LuceneQueryBuilder)3 ReferenceIdent (io.crate.metadata.ReferenceIdent)3 DocTableInfo (io.crate.metadata.doc.DocTableInfo)3 CrateDummyClusterServiceUnitTest (io.crate.test.integration.CrateDummyClusterServiceUnitTest)3 TestingRowConsumer (io.crate.testing.TestingRowConsumer)3 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)3 Document (org.apache.lucene.document.Document)3 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)3 IndexWriter (org.apache.lucene.index.IndexWriter)3 QueryShardContext (org.elasticsearch.index.query.QueryShardContext)3 Test (org.junit.Test)3 OrderBy (io.crate.analyze.OrderBy)2 RamAccounting (io.crate.breaker.RamAccounting)2