use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.
the class LuceneOrderedDocCollectorTest method testSearchWithScores.
@Test
public void testSearchWithScores() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
FieldType fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
for (int i = 0; i < 3; i++) {
addDoc(w, "x", fieldType, "Arthur");
}
// not "Arthur" to lower score
addDoc(w, "x", fieldType, "Arthur");
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = new ConstantScoreQuery(new TermQuery(new Term("x", new BytesRef("Arthur"))));
LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, true);
KeyIterable<ShardId, Row> result = collector.collect();
assertThat(StreamSupport.stream(result.spliterator(), false).count(), is(2L));
Iterator<Row> values = result.iterator();
assertThat(values.next().get(0), Matchers.is(1.0F));
assertThat(values.next().get(0), Matchers.is(1.0F));
}
use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.
the class LuceneOrderedDocCollectorTest method testSearchMoreAppliesMinScoreFilter.
@Test
public void testSearchMoreAppliesMinScoreFilter() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
var keywordFieldType = new KeywordFieldMapper.KeywordFieldType("x");
var fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
for (int i = 0; i < 3; i++) {
addDoc(w, "x", fieldType, "Arthur");
}
// not "Arthur" to lower score
addDoc(w, "x", fieldType, "Arthurr");
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = new FuzzyQuery(new Term("x", "Arthur"), Fuzziness.AUTO.asDistance("Arthur"), 2, 3, true);
LuceneOrderedDocCollector collector;
// without minScore filter we get 2 and 2 docs - this is not necessary for the test but is here
// to make sure the "FuzzyQuery" matches the right documents
collector = collector(searcher, columnReferences, query, null, true);
assertThat(StreamSupport.stream(collector.collect().spliterator(), false).count(), is(2L));
assertThat(StreamSupport.stream(collector.collect().spliterator(), false).count(), is(2L));
collector = collector(searcher, columnReferences, query, 0.15f, true);
int count = 0;
// initialSearch -> 2 rows
for (Row row : collector.collect()) {
assertThat((float) row.get(0), Matchers.greaterThanOrEqualTo(0.15f));
count++;
}
assertThat(count, is(2));
count = 0;
// searchMore -> 1 row is below minScore
for (Row row : collector.collect()) {
assertThat((float) row.get(0), Matchers.greaterThanOrEqualTo(0.15f));
count++;
}
assertThat(count, is(1));
}
use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.
the class LuceneOrderedDocCollectorTest method testSearchNoScores.
@Test
public void testSearchNoScores() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
String name = "x";
var keywordFieldType = new KeywordFieldMapper.KeywordFieldType(name);
var fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
for (int i = 0; i < 3; i++) {
addDoc(w, name, fieldType, "Arthur");
}
// not "Arthur" to lower score
addDoc(w, name, fieldType, "Arthur");
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = new TermQuery(new Term(name, new BytesRef("Arthur")));
LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, false);
KeyIterable<ShardId, Row> result = collector.collect();
assertThat(StreamSupport.stream(result.spliterator(), false).count(), is(2L));
Iterator<Row> values = result.iterator();
assertThat(values.next().get(0), Matchers.is(Float.NaN));
assertThat(values.next().get(0), Matchers.is(Float.NaN));
}
use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.
the class OrderedLuceneBatchIteratorFactoryTest method createOrderedCollector.
private LuceneOrderedDocCollector createOrderedCollector(IndexSearcher searcher, int shardId) {
CollectorContext collectorContext = new CollectorContext();
List<LuceneCollectorExpression<?>> expressions = Collections.singletonList(new OrderByCollectorExpression(reference, orderBy, o -> o));
return new LuceneOrderedDocCollector(new ShardId("dummy", UUIDs.randomBase64UUID(), shardId), searcher, new MatchAllDocsQuery(), null, false, // batchSize < 10 to have at least one searchMore call.
5, RamAccounting.NO_ACCOUNTING, collectorContext, f -> null, new Sort(new SortedNumericSortField(columnName, SortField.Type.LONG, reverseFlags[0])), expressions, expressions);
}
use of io.crate.expression.reference.doc.lucene.LuceneCollectorExpression in project crate by crate.
the class DocValuesGroupByOptimizedIterator method tryOptimize.
@Nullable
static BatchIterator<Row> tryOptimize(Functions functions, IndexShard indexShard, DocTableInfo table, LuceneQueryBuilder luceneQueryBuilder, FieldTypeLookup fieldTypeLookup, DocInputFactory docInputFactory, RoutedCollectPhase collectPhase, CollectTask collectTask) {
if (Symbols.containsColumn(collectPhase.toCollect(), DocSysColumns.SCORE) || Symbols.containsColumn(collectPhase.where(), DocSysColumns.SCORE)) {
return null;
}
Collection<? extends Projection> shardProjections = shardProjections(collectPhase.projections());
GroupProjection groupProjection = getSinglePartialGroupProjection(shardProjections);
if (groupProjection == null) {
return null;
}
ArrayList<Reference> columnKeyRefs = new ArrayList<>(groupProjection.keys().size());
for (var key : groupProjection.keys()) {
var docKeyRef = getKeyRef(collectPhase.toCollect(), key);
if (docKeyRef == null) {
// group by on non-reference
return null;
}
var columnKeyRef = (Reference) DocReferences.inverseSourceLookup(docKeyRef);
var keyFieldType = fieldTypeLookup.get(columnKeyRef.column().fqn());
if (keyFieldType == null || !keyFieldType.hasDocValues()) {
return null;
} else {
columnKeyRefs.add(columnKeyRef);
}
}
// noinspection rawtypes
List<DocValueAggregator> aggregators = DocValuesAggregates.createAggregators(functions, groupProjection.values(), collectPhase.toCollect(), collectTask.txnCtx().sessionSettings().searchPath(), table);
if (aggregators == null) {
return null;
}
ShardId shardId = indexShard.shardId();
SharedShardContext sharedShardContext = collectTask.sharedShardContexts().getOrCreateContext(shardId);
var searcher = sharedShardContext.acquireSearcher("group-by-doc-value-aggregates: " + formatSource(collectPhase));
collectTask.addSearcher(sharedShardContext.readerId(), searcher);
QueryShardContext queryShardContext = sharedShardContext.indexService().newQueryShardContext();
InputFactory.Context<? extends LuceneCollectorExpression<?>> docCtx = docInputFactory.getCtx(collectTask.txnCtx());
List<LuceneCollectorExpression<?>> keyExpressions = new ArrayList<>();
for (var keyRef : columnKeyRefs) {
keyExpressions.add((LuceneCollectorExpression<?>) docCtx.add(keyRef));
}
LuceneQueryBuilder.Context queryContext = luceneQueryBuilder.convert(collectPhase.where(), collectTask.txnCtx(), indexShard.mapperService(), indexShard.shardId().getIndexName(), queryShardContext, table, sharedShardContext.indexService().cache());
if (columnKeyRefs.size() == 1) {
return GroupByIterator.forSingleKey(aggregators, searcher.item(), columnKeyRefs.get(0), keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
} else {
return GroupByIterator.forManyKeys(aggregators, searcher.item(), columnKeyRefs, keyExpressions, collectTask.getRamAccounting(), collectTask.memoryManager(), collectTask.minNodeVersion(), queryContext.query(), new CollectorContext(sharedShardContext.readerId()));
}
}
Aggregations