Search in sources :

Example 1 with LuceneCollectorExpression

use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.

the class LuceneOrderedDocCollectorTest method testSearchWithScores.

@Test
public void testSearchWithScores() throws Exception {
    IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
    FieldType fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
    for (int i = 0; i < 3; i++) {
        addDoc(w, "x", fieldType, "Arthur");
    }
    // not "Arthur" to lower score
    addDoc(w, "x", fieldType, "Arthur");
    w.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
    List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
    Query query = new ConstantScoreQuery(new TermQuery(new Term("x", new BytesRef("Arthur"))));
    LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, true);
    KeyIterable<ShardId, Row> result = collector.collect();
    assertThat(StreamSupport.stream(result.spliterator(), false).count(), is(2L));
    Iterator<Row> values = result.iterator();
    assertThat(values.next().get(0), Matchers.is(1.0F));
    assertThat(values.next().get(0), Matchers.is(1.0F));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) FieldType(org.apache.lucene.document.FieldType) ShardId(org.elasticsearch.index.shard.ShardId) IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) Row(io.crate.data.Row) ScoreCollectorExpression(io.crate.expression.reference.doc.lucene.ScoreCollectorExpression) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest) Test(org.junit.Test)

Example 2 with LuceneCollectorExpression

use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.

the class LuceneOrderedDocCollectorTest method testSearchMoreAppliesMinScoreFilter.

@Test
public void testSearchMoreAppliesMinScoreFilter() throws Exception {
    IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
    var keywordFieldType = new KeywordFieldMapper.KeywordFieldType("x");
    var fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
    for (int i = 0; i < 3; i++) {
        addDoc(w, "x", fieldType, "Arthur");
    }
    // not "Arthur" to lower score
    addDoc(w, "x", fieldType, "Arthurr");
    w.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
    List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
    Query query = new FuzzyQuery(new Term("x", "Arthur"), Fuzziness.AUTO.asDistance("Arthur"), 2, 3, true);
    LuceneOrderedDocCollector collector;
    // without minScore filter we get 2 and 2 docs - this is not necessary for the test but is here
    // to make sure the "FuzzyQuery" matches the right documents
    collector = collector(searcher, columnReferences, query, null, true);
    assertThat(StreamSupport.stream(collector.collect().spliterator(), false).count(), is(2L));
    assertThat(StreamSupport.stream(collector.collect().spliterator(), false).count(), is(2L));
    collector = collector(searcher, columnReferences, query, 0.15f, true);
    int count = 0;
    // initialSearch -> 2 rows
    for (Row row : collector.collect()) {
        assertThat((float) row.get(0), Matchers.greaterThanOrEqualTo(0.15f));
        count++;
    }
    assertThat(count, is(2));
    count = 0;
    // searchMore -> 1 row is below minScore
    for (Row row : collector.collect()) {
        assertThat((float) row.get(0), Matchers.greaterThanOrEqualTo(0.15f));
        count++;
    }
    assertThat(count, is(1));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) Query(org.apache.lucene.search.Query) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) Row(io.crate.data.Row) ScoreCollectorExpression(io.crate.expression.reference.doc.lucene.ScoreCollectorExpression) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest) Test(org.junit.Test)

Example 3 with LuceneCollectorExpression

use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.

the class LuceneOrderedDocCollectorTest method testSearchNoScores.

@Test
public void testSearchNoScores() throws Exception {
    IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
    String name = "x";
    var keywordFieldType = new KeywordFieldMapper.KeywordFieldType(name);
    var fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
    for (int i = 0; i < 3; i++) {
        addDoc(w, name, fieldType, "Arthur");
    }
    // not "Arthur" to lower score
    addDoc(w, name, fieldType, "Arthur");
    w.commit();
    IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
    List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
    Query query = new TermQuery(new Term(name, new BytesRef("Arthur")));
    LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, false);
    KeyIterable<ShardId, Row> result = collector.collect();
    assertThat(StreamSupport.stream(result.spliterator(), false).count(), is(2L));
    Iterator<Row> values = result.iterator();
    assertThat(values.next().get(0), Matchers.is(Float.NaN));
    assertThat(values.next().get(0), Matchers.is(Float.NaN));
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) KeywordAnalyzer(org.apache.lucene.analysis.core.KeywordAnalyzer) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) FuzzyQuery(org.apache.lucene.search.FuzzyQuery) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) ConstantScoreQuery(org.apache.lucene.search.ConstantScoreQuery) TermQuery(org.apache.lucene.search.TermQuery) Term(org.apache.lucene.index.Term) ShardId(org.elasticsearch.index.shard.ShardId) IndexWriter(org.apache.lucene.index.IndexWriter) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) Row(io.crate.data.Row) ScoreCollectorExpression(io.crate.expression.reference.doc.lucene.ScoreCollectorExpression) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) BytesRef(org.apache.lucene.util.BytesRef) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) RandomizedTest(com.carrotsearch.randomizedtesting.RandomizedTest) Test(org.junit.Test)

Example 4 with LuceneCollectorExpression

use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.

the class OrderedLuceneBatchIteratorFactoryTest method createOrderedCollector.

private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, int shardId) {
    CollectorContext collectorContext = new CollectorContext();
    List<LuceneCollectorExpression<?>> expressions = Collections.singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o));
    return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.randomBase64UUID(), shardId), searcher, new MatchAllDocsQuery(), null, false, // batchSize < 10 to have at least one searchMore call.
    5, RamAccounting.NO_ACCOUNTING, collectorContext, f -> null, new Sort(new SortedNumericSortField(columnName, SortField.Type.LONG, reverseFlags[0])), expressions, expressions);
}
Also used : ShardId(org.elasticsearch.index.shard.ShardId) ArgumentMatchers.any(org.mockito.ArgumentMatchers.any) Arrays(java.util.Arrays) RowAccounting(io.crate.breaker.RowAccounting) LongType(io.crate.types.LongType) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) TestingHelpers.createReference(io.crate.testing.TestingHelpers.createReference) RowAccountingWithEstimators(io.crate.breaker.RowAccountingWithEstimators) BatchIterator(io.crate.data.BatchIterator) BatchIteratorTester(io.crate.testing.BatchIteratorTester) AtomicReference(java.util.concurrent.atomic.AtomicReference) Mockito.doThrow(org.mockito.Mockito.doThrow) Document(org.apache.lucene.document.Document) ByteBuffersDirectory(org.apache.lucene.store.ByteBuffersDirectory) ESTestCase(org.elasticsearch.test.ESTestCase) SortField(org.apache.lucene.search.SortField) OrderingByPosition(io.crate.execution.engine.sort.OrderingByPosition) Before(org.junit.Before) NumericDocValuesField(org.apache.lucene.document.NumericDocValuesField) LongStream(java.util.stream.LongStream) Sort(org.apache.lucene.search.Sort) DirectoryReader(org.apache.lucene.index.DirectoryReader) Reference(io.crate.metadata.Reference) UUIDs(org.elasticsearch.common.UUIDs) Test(org.junit.Test) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) RamAccounting(io.crate.breaker.RamAccounting) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) Collectors(java.util.stream.Collectors) TimeUnit(java.util.concurrent.TimeUnit) CountDownLatch(java.util.concurrent.CountDownLatch) IndexWriter(org.apache.lucene.index.IndexWriter) List(java.util.List) OrderBy(io.crate.analyze.OrderBy) StandardAnalyzer(org.apache.lucene.analysis.standard.StandardAnalyzer) Row(io.crate.data.Row) CircuitBreakingException(org.elasticsearch.common.breaker.CircuitBreakingException) OrderByCollectorExpression(io.crate.expression.reference.doc.lucene.OrderByCollectorExpression) DataTypes(io.crate.types.DataTypes) Matchers.is(org.hamcrest.Matchers.is) TestingRowConsumer(io.crate.testing.TestingRowConsumer) Comparator(java.util.Comparator) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig) Collections(java.util.Collections) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) IndexSearcher(org.apache.lucene.search.IndexSearcher) Mockito.mock(org.mockito.Mockito.mock) ShardId(org.elasticsearch.index.shard.ShardId) SortedNumericSortField(org.apache.lucene.search.SortedNumericSortField) OrderByCollectorExpression(io.crate.expression.reference.doc.lucene.OrderByCollectorExpression) Sort(org.apache.lucene.search.Sort) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) MatchAllDocsQuery(org.apache.lucene.search.MatchAllDocsQuery) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression)

Example 5 with LuceneCollectorExpression

use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.

the class DocValuesGroupByOptimizedIterator method tryOptimize.

@Nullable
static BatchIterator<Row> tryOptimize(Functions functions, IndexShard indexShard, DocTableInfo table, LuceneQueryBuilder luceneQueryBuilder, FieldTypeLookup fieldTypeLookup, DocInputFactory docInputFactory, RoutedCollectPhase collectPhase, CollectTask collectTask) {
    if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE) || Symbols.containsColumn(collectPhase.where(), DocSysColumns.SCORE)) {
        return null;
    }
    Collection<? extends Projection> shardProjections = shardProjections(collectPhase.projections());
    GroupProjection groupProjection = getSinglePartialGroupProjection(shardProjections);
    if (groupProjection == null) {
        return null;
    }
    ArrayList<Reference> columnKeyRefs = new ArrayList<>(groupProjection.keys().size());
    for (var key : groupProjection.keys()) {
        var docKeyRef = getKeyRef(collectPhase.toCollect(), key);
        if (docKeyRef == null) {
            // group by on non-reference
            return null;
        }
        var columnKeyRef = (Reference) DocReferences.inverseSourceLookup(docKeyRef);
        var keyFieldType = fieldTypeLookup.get(columnKeyRef.column().fqn());
        if (keyFieldType == null || !keyFieldType.hasDocValues()) {
            return null;
        } else {
            columnKeyRefs.add(columnKeyRef);
        }
    }
    // noinspection rawtypes
    List<DocValueAggregator> aggregators = DocValuesAggregates.createAggregators(functions, groupProjection.values(), collectPhase.toCollect(), collectTask.txnCtx().sessionSettings().searchPath(), table);
    if (aggregators == null) {
        return null;
    }
    ShardId shardId = indexShard.shardId();
    SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
    var searcher = sharedShardContext.acquireSearcher("group-by-doc-value-aggregates: " + formatSource(collectPhase));
    collectTask.addSearcher(sharedShardContext.readerId(), searcher);
    QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
    InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.getCtx(collectTask.txnCtx());
    List<LuceneCollectorExpression<?>> keyExpressions = new ArrayList<>();
    for (var keyRef : columnKeyRefs) {
        keyExpressions.add((LuceneCollectorExpression<?>) docCtx.add(keyRef));
    }
    LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexShard.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
    if (columnKeyRefs.size() == 1) {
        return GroupByIterator.forSingleKey(aggregators, searcher.item(), columnKeyRefs.get(0), keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
    } else {
        return GroupByIterator.forManyKeys(aggregators, searcher.item(), columnKeyRefs, keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
    }
}
Also used : InputFactory(io.crate.expression.InputFactory) DocValueAggregator(io.crate.execution.engine.aggregation.DocValueAggregator) AtomicReference(java.util.concurrent.atomic.AtomicReference) Reference(io.crate.metadata.Reference) ArrayList(java.util.ArrayList) ShardId(org.elasticsearch.index.shard.ShardId) LuceneQueryBuilder(io.crate.lucene.LuceneQueryBuilder) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) CollectorContext(io.crate.expression.reference.doc.lucene.CollectorContext) GroupProjection(io.crate.execution.dsl.projection.GroupProjection) SharedShardContext(io.crate.execution.jobs.SharedShardContext) LuceneCollectorExpression(io.crate.expression.reference.doc.lucene.LuceneCollectorExpression) Nullable(javax.annotation.Nullable)

Aggregations

LuceneCollectorExpression (io.crate.expression.reference.doc.lucene.LuceneCollectorExpression)5 Row (io.crate.data.Row)4 IndexWriter (org.apache.lucene.index.IndexWriter)4 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)4 IndexSearcher (org.apache.lucene.search.IndexSearcher)4 MatchAllDocsQuery (org.apache.lucene.search.MatchAllDocsQuery)4 ByteBuffersDirectory (org.apache.lucene.store.ByteBuffersDirectory)4 ShardId (org.elasticsearch.index.shard.ShardId)4 Test (org.junit.Test)4 RandomizedTest (com.carrotsearch.randomizedtesting.RandomizedTest)3 ScoreCollectorExpression (io.crate.expression.reference.doc.lucene.ScoreCollectorExpression)3 KeywordAnalyzer (org.apache.lucene.analysis.core.KeywordAnalyzer)3 Term (org.apache.lucene.index.Term)3 ConstantScoreQuery (org.apache.lucene.search.ConstantScoreQuery)3 FuzzyQuery (org.apache.lucene.search.FuzzyQuery)3 Query (org.apache.lucene.search.Query)3 TermQuery (org.apache.lucene.search.TermQuery)3 CollectorContext (io.crate.expression.reference.doc.lucene.CollectorContext)2 Reference (io.crate.metadata.Reference)2 AtomicReference (java.util.concurrent.atomic.AtomicReference)2