use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testRandom.
public void testRandom() throws Exception {
int numDigits = TestUtil.nextInt(random(), 1, 6);
Set<String> keys = new HashSet<>();
int keyCount = TestUtil.nextInt(random(), 1, 20);
if (numDigits == 1) {
keyCount = Math.min(9, keyCount);
}
while (keys.size() < keyCount) {
keys.add(randomSimpleString(numDigits, 10));
}
List<String> keysList = new ArrayList<>(keys);
Analyzer analyzer = new MockAnalyzer(random());
IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
// we rely on docID order:
iwc.setMergePolicy(newLogMergePolicy());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
int docCount = TestUtil.nextInt(random(), 1, 200);
Entry[] docs = new Entry[docCount];
for (int i = 0; i < docCount; i++) {
int weight = random().nextInt(40);
String key = keysList.get(random().nextInt(keyCount));
//System.out.println("KEY: " + key);
docs[i] = new Entry(key, null, weight, i);
Document doc = new Document();
doc.add(new SuggestField("suggest_field", key, weight));
iw.addDocument(doc);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
int iters = atLeast(200);
for (int iter = 0; iter < iters; iter++) {
String prefix = randomSimpleString(numDigits, 2);
if (VERBOSE) {
System.out.println("\nTEST: prefix=" + prefix);
}
// slow but hopefully correct suggester:
List<Entry> expected = new ArrayList<>();
for (Entry doc : docs) {
if (doc.output.startsWith(prefix)) {
expected.add(doc);
}
}
Collections.sort(expected, new Comparator<Entry>() {
@Override
public int compare(Entry a, Entry b) {
// sort by higher score:
int cmp = Float.compare(b.value, a.value);
if (cmp == 0) {
// tie break by smaller docID:
cmp = Integer.compare(a.id, b.id);
}
return cmp;
}
});
boolean dedup = random().nextBoolean();
if (dedup) {
List<Entry> deduped = new ArrayList<>();
Set<String> seen = new HashSet<>();
for (Entry entry : expected) {
if (seen.contains(entry.output) == false) {
seen.add(entry.output);
deduped.add(entry);
}
}
expected = deduped;
}
// TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
//int topN = TestUtil.nextInt(random(), 1, docCount+10);
int topN = docCount;
if (VERBOSE) {
if (dedup) {
System.out.println(" expected (dedup'd) topN=" + topN + ":");
} else {
System.out.println(" expected topN=" + topN + ":");
}
for (int i = 0; i < expected.size(); i++) {
if (i >= topN) {
System.out.println(" leftover: " + i + ": " + expected.get(i));
} else {
System.out.println(" " + i + ": " + expected.get(i));
}
}
}
expected = expected.subList(0, Math.min(topN, expected.size()));
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
searcher.suggest(query, collector);
TopSuggestDocs actual = collector.get();
if (VERBOSE) {
System.out.println(" actual:");
SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
for (int i = 0; i < suggestScoreDocs.length; i++) {
System.out.println(" " + i + ": " + suggestScoreDocs[i]);
}
}
assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
}
reader.close();
iw.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testSuggestOnAllFilteredDocuments.
@Test
public void testSuggestOnAllFilteredDocuments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, i));
document.add(newStringField("str_fld", "deleted", Field.Store.NO));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
BitsProducer filter = new BitsProducer() {
@Override
public Bits getBits(LeafReaderContext context) throws IOException {
return new Bits.MatchNoBits(context.reader().maxDoc());
}
};
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
// no random access required;
// calling suggest with filter that does not match any documents should early terminate
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertThat(suggest.totalHits, equalTo(0));
reader.close();
iw.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testScoring.
@Test
public void testScoring() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(100));
String[] prefixes = { "abc", "bac", "cab" };
Map<String, Integer> mappings = new HashMap<>();
for (int i = 0; i < num; i++) {
Document document = new Document();
String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" + String.valueOf(i);
int weight = random().nextInt(Integer.MAX_VALUE);
document.add(new SuggestField("suggest_field", suggest, weight));
mappings.put(suggest, weight);
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
for (String prefix : prefixes) {
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
assertTrue(suggest.totalHits > 0);
float topScore = -1;
for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
if (topScore != -1) {
assertTrue(topScore >= scoreDoc.score);
}
topScore = scoreDoc.score;
assertThat((float) mappings.get(scoreDoc.key.toString()), equalTo(scoreDoc.score));
assertNotNull(mappings.remove(scoreDoc.key.toString()));
}
}
assertThat(mappings.size(), equalTo(0));
reader.close();
iw.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testNRTDeletedDocFiltering.
@Test
public void testNRTDeletedDocFiltering() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
// using IndexWriter instead of RandomIndexWriter
IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
int numLive = 0;
List<Entry> expectedEntries = new ArrayList<>();
for (int i = 0; i < num; i++) {
Document document = new Document();
document.add(new SuggestField("suggest_field", "abc_" + i, num - i));
if (i % 2 == 0) {
document.add(newStringField("str_field", "delete", Field.Store.YES));
} else {
numLive++;
expectedEntries.add(new Entry("abc_" + i, num - i));
document.add(newStringField("str_field", "no_delete", Field.Store.YES));
}
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
iw.deleteDocuments(new Term("str_field", "delete"));
DirectoryReader reader = DirectoryReader.open(iw);
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
reader.close();
iw.close();
}
use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.
the class TestSuggestField method testMultipleSegments.
@Test
public void testMultipleSegments() throws Exception {
Analyzer analyzer = new MockAnalyzer(random());
RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
int num = Math.min(1000, atLeast(10));
List<Entry> entries = new ArrayList<>();
// ensure at least some segments have no suggest field
for (int i = num; i > 0; i--) {
Document document = new Document();
if (random().nextInt(4) == 1) {
document.add(new SuggestField("suggest_field", "abc_" + i, i));
entries.add(new Entry("abc_" + i, i));
}
document.add(new StoredField("weight_fld", i));
iw.addDocument(document);
if (usually()) {
iw.commit();
}
}
DirectoryReader reader = iw.getReader();
SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
reader.close();
iw.close();
}
Aggregations