use of org.apache.lucene.store.ByteBuffersDirectory in project jena by apache.
the class AbstractTestDatasetWithLuceneGraphTextIndex method init.
@Before
public void init() {
Dataset ds1 = TDBFactory.createDataset();
Directory dir = new ByteBuffersDirectory();
EntityDefinition eDef = new EntityDefinition("iri", "text");
eDef.setGraphField("graph");
eDef.setPrimaryPredicate(RDFS.label);
// some tests require indexing rdfs:comment
eDef.set("comment", RDFS.comment.asNode());
TextIndex tidx = new TextIndexLucene(dir, new TextIndexConfig(eDef));
dataset = TextDatasetFactory.create(ds1, tidx);
}
use of org.apache.lucene.store.ByteBuffersDirectory in project crate by crate.
the class LuceneBatchIteratorBenchmark method createLuceneBatchIterator.
@Setup
public void createLuceneBatchIterator() throws Exception {
IndexWriter iw = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new StandardAnalyzer()));
String columnName = "x";
for (int i = 0; i < 10_000_000; i++) {
Document doc = new Document();
doc.add(new NumericDocValuesField(columnName, i));
iw.addDocument(doc);
}
iw.commit();
iw.forceMerge(1, true);
indexSearcher = new IndexSearcher(DirectoryReader.open(iw));
IntegerColumnReference columnReference = new IntegerColumnReference(columnName);
columnRefs = Collections.singletonList(columnReference);
collectorContext = new CollectorContext();
}
use of org.apache.lucene.store.ByteBuffersDirectory in project crate by crate.
the class LuceneOrderedDocCollectorTest method testSearchWithScores.
@Test
public void testSearchWithScores() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
FieldType fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
for (int i = 0; i < 3; i++) {
addDoc(w, "x", fieldType, "Arthur");
}
// not "Arthur" to lower score
addDoc(w, "x", fieldType, "Arthur");
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = new ConstantScoreQuery(new TermQuery(new Term("x", new BytesRef("Arthur"))));
LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, true);
KeyIterable<ShardId, Row> result = collector.collect();
assertThat(StreamSupport.stream(result.spliterator(), false).count(), is(2L));
Iterator<Row> values = result.iterator();
assertThat(values.next().get(0), Matchers.is(1.0F));
assertThat(values.next().get(0), Matchers.is(1.0F));
}
use of org.apache.lucene.store.ByteBuffersDirectory in project crate by crate.
the class LuceneOrderedDocCollectorTest method testSearchMoreAppliesMinScoreFilter.
@Test
public void testSearchMoreAppliesMinScoreFilter() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
var keywordFieldType = new KeywordFieldMapper.KeywordFieldType("x");
var fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
for (int i = 0; i < 3; i++) {
addDoc(w, "x", fieldType, "Arthur");
}
// not "Arthur" to lower score
addDoc(w, "x", fieldType, "Arthurr");
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = new FuzzyQuery(new Term("x", "Arthur"), Fuzziness.AUTO.asDistance("Arthur"), 2, 3, true);
LuceneOrderedDocCollector collector;
// without minScore filter we get 2 and 2 docs - this is not necessary for the test but is here
// to make sure the "FuzzyQuery" matches the right documents
collector = collector(searcher, columnReferences, query, null, true);
assertThat(StreamSupport.stream(collector.collect().spliterator(), false).count(), is(2L));
assertThat(StreamSupport.stream(collector.collect().spliterator(), false).count(), is(2L));
collector = collector(searcher, columnReferences, query, 0.15f, true);
int count = 0;
// initialSearch -> 2 rows
for (Row row : collector.collect()) {
assertThat((float) row.get(0), Matchers.greaterThanOrEqualTo(0.15f));
count++;
}
assertThat(count, is(2));
count = 0;
// searchMore -> 1 row is below minScore
for (Row row : collector.collect()) {
assertThat((float) row.get(0), Matchers.greaterThanOrEqualTo(0.15f));
count++;
}
assertThat(count, is(1));
}
use of org.apache.lucene.store.ByteBuffersDirectory in project crate by crate.
the class LuceneOrderedDocCollectorTest method testSearchNoScores.
@Test
public void testSearchNoScores() throws Exception {
IndexWriter w = new IndexWriter(new ByteBuffersDirectory(), new IndexWriterConfig(new KeywordAnalyzer()));
String name = "x";
var keywordFieldType = new KeywordFieldMapper.KeywordFieldType(name);
var fieldType = KeywordFieldMapper.Defaults.FIELD_TYPE;
for (int i = 0; i < 3; i++) {
addDoc(w, name, fieldType, "Arthur");
}
// not "Arthur" to lower score
addDoc(w, name, fieldType, "Arthur");
w.commit();
IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(w, true, true));
List<LuceneCollectorExpression<?>> columnReferences = Collections.singletonList(new ScoreCollectorExpression());
Query query = new TermQuery(new Term(name, new BytesRef("Arthur")));
LuceneOrderedDocCollector collector = collector(searcher, columnReferences, query, null, false);
KeyIterable<ShardId, Row> result = collector.collect();
assertThat(StreamSupport.stream(result.spliterator(), false).count(), is(2L));
Iterator<Row> values = result.iterator();
assertThat(values.next().get(0), Matchers.is(Float.NaN));
assertThat(values.next().get(0), Matchers.is(Float.NaN));
}
Aggregations