Search in sources :

Example 46 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testRandom.

public void testRandom() throws Exception {
    int numDigits = TestUtil.nextInt(random(), 1, 6);
    Set<String> keys = new HashSet<>();
    int keyCount = TestUtil.nextInt(random(), 1, 20);
    if (numDigits == 1) {
        keyCount = Math.min(9, keyCount);
    }
    while (keys.size() < keyCount) {
        keys.add(randomSimpleString(numDigits, 10));
    }
    List<String> keysList = new ArrayList<>(keys);
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
    // we rely on docID order:
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    int docCount = TestUtil.nextInt(random(), 1, 200);
    Entry[] docs = new Entry[docCount];
    for (int i = 0; i < docCount; i++) {
        int weight = random().nextInt(40);
        String key = keysList.get(random().nextInt(keyCount));
        //System.out.println("KEY: " + key);
        docs[i] = new Entry(key, null, weight, i);
        Document doc = new Document();
        doc.add(new SuggestField("suggest_field", key, weight));
        iw.addDocument(doc);
        if (usually()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
    int iters = atLeast(200);
    for (int iter = 0; iter < iters; iter++) {
        String prefix = randomSimpleString(numDigits, 2);
        if (VERBOSE) {
            System.out.println("\nTEST: prefix=" + prefix);
        }
        // slow but hopefully correct suggester:
        List<Entry> expected = new ArrayList<>();
        for (Entry doc : docs) {
            if (doc.output.startsWith(prefix)) {
                expected.add(doc);
            }
        }
        Collections.sort(expected, new Comparator<Entry>() {

            @Override
            public int compare(Entry a, Entry b) {
                // sort by higher score:
                int cmp = Float.compare(b.value, a.value);
                if (cmp == 0) {
                    // tie break by smaller docID:
                    cmp = Integer.compare(a.id, b.id);
                }
                return cmp;
            }
        });
        boolean dedup = random().nextBoolean();
        if (dedup) {
            List<Entry> deduped = new ArrayList<>();
            Set<String> seen = new HashSet<>();
            for (Entry entry : expected) {
                if (seen.contains(entry.output) == false) {
                    seen.add(entry.output);
                    deduped.add(entry);
                }
            }
            expected = deduped;
        }
        // TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
        //int topN = TestUtil.nextInt(random(), 1, docCount+10);
        int topN = docCount;
        if (VERBOSE) {
            if (dedup) {
                System.out.println("  expected (dedup'd) topN=" + topN + ":");
            } else {
                System.out.println("  expected topN=" + topN + ":");
            }
            for (int i = 0; i < expected.size(); i++) {
                if (i >= topN) {
                    System.out.println("    leftover: " + i + ": " + expected.get(i));
                } else {
                    System.out.println("    " + i + ": " + expected.get(i));
                }
            }
        }
        expected = expected.subList(0, Math.min(topN, expected.size()));
        PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
        TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
        searcher.suggest(query, collector);
        TopSuggestDocs actual = collector.get();
        if (VERBOSE) {
            System.out.println("  actual:");
            SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
            for (int i = 0; i < suggestScoreDocs.length; i++) {
                System.out.println("    " + i + ": " + suggestScoreDocs[i]);
            }
        }
        assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
    }
    reader.close();
    iw.close();
}
Also used : ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) IntPoint(org.apache.lucene.document.IntPoint) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 47 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testSuggestOnAllFilteredDocuments.

@Test
public void testSuggestOnAllFilteredDocuments() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        document.add(new SuggestField("suggest_field", "abc_" + i, i));
        document.add(newStringField("str_fld", "deleted", Field.Store.NO));
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    BitsProducer filter = new BitsProducer() {

        @Override
        public Bits getBits(LeafReaderContext context) throws IOException {
            return new Bits.MatchNoBits(context.reader().maxDoc());
        }
    };
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    // no random access required;
    // calling suggest with filter that does not match any documents should early terminate
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"), filter);
    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertThat(suggest.totalHits, equalTo(0));
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) LeafReaderContext(org.apache.lucene.index.LeafReaderContext) BitsProducer(org.apache.lucene.search.suggest.BitsProducer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 48 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testScoring.

@Test
public void testScoring() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(100));
    String[] prefixes = { "abc", "bac", "cab" };
    Map<String, Integer> mappings = new HashMap<>();
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" + String.valueOf(i);
        int weight = random().nextInt(Integer.MAX_VALUE);
        document.add(new SuggestField("suggest_field", suggest, weight));
        mappings.put(suggest, weight);
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    for (String prefix : prefixes) {
        PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
        TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
        assertTrue(suggest.totalHits > 0);
        float topScore = -1;
        for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
            if (topScore != -1) {
                assertTrue(topScore >= scoreDoc.score);
            }
            topScore = scoreDoc.score;
            assertThat((float) mappings.get(scoreDoc.key.toString()), equalTo(scoreDoc.score));
            assertNotNull(mappings.remove(scoreDoc.key.toString()));
        }
    }
    assertThat(mappings.size(), equalTo(0));
    reader.close();
    iw.close();
}
Also used : HashMap(java.util.HashMap) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 49 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testNRTDeletedDocFiltering.

@Test
public void testNRTDeletedDocFiltering() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    // using IndexWriter instead of RandomIndexWriter
    IndexWriter iw = new IndexWriter(dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    int numLive = 0;
    List<Entry> expectedEntries = new ArrayList<>();
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        document.add(new SuggestField("suggest_field", "abc_" + i, num - i));
        if (i % 2 == 0) {
            document.add(newStringField("str_field", "delete", Field.Store.YES));
        } else {
            numLive++;
            expectedEntries.add(new Entry("abc_" + i, num - i));
            document.add(newStringField("str_field", "no_delete", Field.Store.YES));
        }
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    iw.deleteDocuments(new Term("str_field", "delete"));
    DirectoryReader reader = DirectoryReader.open(iw);
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, numLive, false);
    assertSuggestions(suggest, expectedEntries.toArray(new Entry[expectedEntries.size()]));
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) IndexWriter(org.apache.lucene.index.IndexWriter) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 50 with DirectoryReader

use of org.apache.lucene.index.DirectoryReader in project lucene-solr by apache.

the class TestSuggestField method testMultipleSegments.

@Test
public void testMultipleSegments() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    List<Entry> entries = new ArrayList<>();
    // ensure at least some segments have no suggest field
    for (int i = num; i > 0; i--) {
        Document document = new Document();
        if (random().nextInt(4) == 1) {
            document.add(new SuggestField("suggest_field", "abc_" + i, i));
            entries.add(new Entry("abc_" + i, i));
        }
        document.add(new StoredField("weight_fld", i));
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, (entries.size() == 0) ? 1 : entries.size(), false);
    assertSuggestions(suggest, entries.toArray(new Entry[entries.size()]));
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Aggregations

DirectoryReader (org.apache.lucene.index.DirectoryReader)362 Document (org.apache.lucene.document.Document)228 Directory (org.apache.lucene.store.Directory)206 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)149 IndexWriter (org.apache.lucene.index.IndexWriter)139 Term (org.apache.lucene.index.Term)134 IndexSearcher (org.apache.lucene.search.IndexSearcher)101 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)98 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)96 Test (org.junit.Test)64 StringField (org.apache.lucene.document.StringField)61 Analyzer (org.apache.lucene.analysis.Analyzer)54 BytesRef (org.apache.lucene.util.BytesRef)51 LeafReader (org.apache.lucene.index.LeafReader)49 ArrayList (java.util.ArrayList)46 Field (org.apache.lucene.document.Field)45 LeafReaderContext (org.apache.lucene.index.LeafReaderContext)44 TermQuery (org.apache.lucene.search.TermQuery)42 IOException (java.io.IOException)37 TextField (org.apache.lucene.document.TextField)36