Search in sources :

Example 1 with SuggestScoreDoc

use of org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc in project lucene-solr by apache.

the class TopSuggestDocsCollector method get.

/**
   * Returns at most <code>num</code> Top scoring {@link org.apache.lucene.search.suggest.document.TopSuggestDocs}s
   */
public TopSuggestDocs get() throws IOException {
    SuggestScoreDoc[] suggestScoreDocs;
    if (seenSurfaceForms != null) {
        // NOTE: this also clears the priorityQueue:
        for (SuggestScoreDoc hit : priorityQueue.getResults()) {
            pendingResults.add(hit);
        }
        // Deduplicate all hits: we already dedup'd efficiently within each segment by
        // truncating the FST top paths search, but across segments there may still be dups:
        seenSurfaceForms.clear();
        // TODO: we could use a priority queue here to make cost O(N * log(num)) instead of O(N * log(N)), where N = O(num *
        // numSegments), but typically numSegments is smallish and num is smallish so this won't matter much in practice:
        Collections.sort(pendingResults, new Comparator<SuggestScoreDoc>() {

            @Override
            public int compare(SuggestScoreDoc a, SuggestScoreDoc b) {
                // sort by higher score
                int cmp = Float.compare(b.score, a.score);
                if (cmp == 0) {
                    // tie break by lower docID:
                    cmp = Integer.compare(a.doc, b.doc);
                }
                return cmp;
            }
        });
        List<SuggestScoreDoc> hits = new ArrayList<>();
        for (SuggestScoreDoc hit : pendingResults) {
            if (seenSurfaceForms.contains(hit.key) == false) {
                seenSurfaceForms.add(hit.key);
                hits.add(hit);
                if (hits.size() == num) {
                    break;
                }
            }
        }
        suggestScoreDocs = hits.toArray(new SuggestScoreDoc[0]);
    } else {
        suggestScoreDocs = priorityQueue.getResults();
    }
    if (suggestScoreDocs.length > 0) {
        return new TopSuggestDocs(suggestScoreDocs.length, suggestScoreDocs, suggestScoreDocs[0].score);
    } else {
        return TopSuggestDocs.EMPTY;
    }
}
Also used : ArrayList(java.util.ArrayList) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc)

Example 2 with SuggestScoreDoc

use of org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc in project lucene-solr by apache.

the class TestSuggestField method testReturnedDocID.

@Test
public void testReturnedDocID() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(10));
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        document.add(new SuggestField("suggest_field", "abc_" + i, num));
        document.add(new StoredField("int_field", i));
        iw.addDocument(document);
        if (random().nextBoolean()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", "abc_"));
    TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
    assertEquals(num, suggest.totalHits);
    for (SuggestScoreDoc suggestScoreDoc : suggest.scoreLookupDocs()) {
        String key = suggestScoreDoc.key.toString();
        assertTrue(key.startsWith("abc_"));
        String substring = key.substring(4);
        int fieldValue = Integer.parseInt(substring);
        Document doc = reader.document(suggestScoreDoc.doc);
        assertEquals(doc.getField("int_field").numericValue().intValue(), fieldValue);
    }
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) StoredField(org.apache.lucene.document.StoredField) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 3 with SuggestScoreDoc

use of org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc in project lucene-solr by apache.

the class TestSuggestField method testRandom.

public void testRandom() throws Exception {
    int numDigits = TestUtil.nextInt(random(), 1, 6);
    Set<String> keys = new HashSet<>();
    int keyCount = TestUtil.nextInt(random(), 1, 20);
    if (numDigits == 1) {
        keyCount = Math.min(9, keyCount);
    }
    while (keys.size() < keyCount) {
        keys.add(randomSimpleString(numDigits, 10));
    }
    List<String> keysList = new ArrayList<>(keys);
    Analyzer analyzer = new MockAnalyzer(random());
    IndexWriterConfig iwc = iwcWithSuggestField(analyzer, "suggest_field");
    // we rely on docID order:
    iwc.setMergePolicy(newLogMergePolicy());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwc);
    int docCount = TestUtil.nextInt(random(), 1, 200);
    Entry[] docs = new Entry[docCount];
    for (int i = 0; i < docCount; i++) {
        int weight = random().nextInt(40);
        String key = keysList.get(random().nextInt(keyCount));
        //System.out.println("KEY: " + key);
        docs[i] = new Entry(key, null, weight, i);
        Document doc = new Document();
        doc.add(new SuggestField("suggest_field", key, weight));
        iw.addDocument(doc);
        if (usually()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher searcher = new SuggestIndexSearcher(reader);
    int iters = atLeast(200);
    for (int iter = 0; iter < iters; iter++) {
        String prefix = randomSimpleString(numDigits, 2);
        if (VERBOSE) {
            System.out.println("\nTEST: prefix=" + prefix);
        }
        // slow but hopefully correct suggester:
        List<Entry> expected = new ArrayList<>();
        for (Entry doc : docs) {
            if (doc.output.startsWith(prefix)) {
                expected.add(doc);
            }
        }
        Collections.sort(expected, new Comparator<Entry>() {

            @Override
            public int compare(Entry a, Entry b) {
                // sort by higher score:
                int cmp = Float.compare(b.value, a.value);
                if (cmp == 0) {
                    // tie break by smaller docID:
                    cmp = Integer.compare(a.id, b.id);
                }
                return cmp;
            }
        });
        boolean dedup = random().nextBoolean();
        if (dedup) {
            List<Entry> deduped = new ArrayList<>();
            Set<String> seen = new HashSet<>();
            for (Entry entry : expected) {
                if (seen.contains(entry.output) == false) {
                    seen.add(entry.output);
                    deduped.add(entry);
                }
            }
            expected = deduped;
        }
        // TODO: re-enable this, except something is buggy about tie breaks at the topN threshold now:
        //int topN = TestUtil.nextInt(random(), 1, docCount+10);
        int topN = docCount;
        if (VERBOSE) {
            if (dedup) {
                System.out.println("  expected (dedup'd) topN=" + topN + ":");
            } else {
                System.out.println("  expected topN=" + topN + ":");
            }
            for (int i = 0; i < expected.size(); i++) {
                if (i >= topN) {
                    System.out.println("    leftover: " + i + ": " + expected.get(i));
                } else {
                    System.out.println("    " + i + ": " + expected.get(i));
                }
            }
        }
        expected = expected.subList(0, Math.min(topN, expected.size()));
        PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
        TopSuggestDocsCollector collector = new TopSuggestDocsCollector(topN, dedup);
        searcher.suggest(query, collector);
        TopSuggestDocs actual = collector.get();
        if (VERBOSE) {
            System.out.println("  actual:");
            SuggestScoreDoc[] suggestScoreDocs = (SuggestScoreDoc[]) actual.scoreDocs;
            for (int i = 0; i < suggestScoreDocs.length; i++) {
                System.out.println("    " + i + ": " + suggestScoreDocs[i]);
            }
        }
        assertSuggestions(actual, expected.toArray(new Entry[expected.size()]));
    }
    reader.close();
    iw.close();
}
Also used : ArrayList(java.util.ArrayList) CopyOnWriteArrayList(java.util.concurrent.CopyOnWriteArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) HashSet(java.util.HashSet) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) IntPoint(org.apache.lucene.document.IntPoint) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Example 4 with SuggestScoreDoc

use of org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc in project lucene-solr by apache.

the class TestSuggestField method testScoring.

@Test
public void testScoring() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "suggest_field"));
    int num = Math.min(1000, atLeast(100));
    String[] prefixes = { "abc", "bac", "cab" };
    Map<String, Integer> mappings = new HashMap<>();
    for (int i = 0; i < num; i++) {
        Document document = new Document();
        String suggest = prefixes[i % 3] + TestUtil.randomSimpleString(random(), 10) + "_" + String.valueOf(i);
        int weight = random().nextInt(Integer.MAX_VALUE);
        document.add(new SuggestField("suggest_field", suggest, weight));
        mappings.put(suggest, weight);
        iw.addDocument(document);
        if (usually()) {
            iw.commit();
        }
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher indexSearcher = new SuggestIndexSearcher(reader);
    for (String prefix : prefixes) {
        PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("suggest_field", prefix));
        TopSuggestDocs suggest = indexSearcher.suggest(query, num, false);
        assertTrue(suggest.totalHits > 0);
        float topScore = -1;
        for (SuggestScoreDoc scoreDoc : suggest.scoreLookupDocs()) {
            if (topScore != -1) {
                assertTrue(topScore >= scoreDoc.score);
            }
            topScore = scoreDoc.score;
            assertThat((float) mappings.get(scoreDoc.key.toString()), equalTo(scoreDoc.score));
            assertNotNull(mappings.remove(scoreDoc.key.toString()));
        }
    }
    assertThat(mappings.size(), equalTo(0));
    reader.close();
    iw.close();
}
Also used : HashMap(java.util.HashMap) DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Example 5 with SuggestScoreDoc

use of org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc in project lucene-solr by apache.

the class TestSuggestField method testMultipleSuggestFieldsPerDoc.

@Test
public void testMultipleSuggestFieldsPerDoc() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random());
    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, iwcWithSuggestField(analyzer, "sug_field_1", "sug_field_2"));
    Document document = new Document();
    document.add(new SuggestField("sug_field_1", "apple", 4));
    document.add(new SuggestField("sug_field_2", "april", 3));
    iw.addDocument(document);
    document = new Document();
    document.add(new SuggestField("sug_field_1", "aples", 3));
    document.add(new SuggestField("sug_field_2", "apartment", 2));
    iw.addDocument(document);
    if (rarely()) {
        iw.commit();
    }
    DirectoryReader reader = iw.getReader();
    SuggestIndexSearcher suggestIndexSearcher = new SuggestIndexSearcher(reader);
    PrefixCompletionQuery query = new PrefixCompletionQuery(analyzer, new Term("sug_field_1", "ap"));
    TopSuggestDocs suggestDocs1 = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggestDocs1, new Entry("apple", 4), new Entry("aples", 3));
    query = new PrefixCompletionQuery(analyzer, new Term("sug_field_2", "ap"));
    TopSuggestDocs suggestDocs2 = suggestIndexSearcher.suggest(query, 4, false);
    assertSuggestions(suggestDocs2, new Entry("april", 3), new Entry("apartment", 2));
    // check that the doc ids are consistent
    for (int i = 0; i < suggestDocs1.scoreDocs.length; i++) {
        ScoreDoc suggestScoreDoc = suggestDocs1.scoreDocs[i];
        assertThat(suggestScoreDoc.doc, equalTo(suggestDocs2.scoreDocs[i].doc));
    }
    reader.close();
    iw.close();
}
Also used : DirectoryReader(org.apache.lucene.index.DirectoryReader) Term(org.apache.lucene.index.Term) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Document(org.apache.lucene.document.Document) IntPoint(org.apache.lucene.document.IntPoint) ScoreDoc(org.apache.lucene.search.ScoreDoc) SuggestScoreDoc(org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) RandomIndexWriter(org.apache.lucene.index.RandomIndexWriter) Test(org.junit.Test)

Aggregations

SuggestScoreDoc (org.apache.lucene.search.suggest.document.TopSuggestDocs.SuggestScoreDoc)6 IntPoint (org.apache.lucene.document.IntPoint)5 Analyzer (org.apache.lucene.analysis.Analyzer)4 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)4 Document (org.apache.lucene.document.Document)4 DirectoryReader (org.apache.lucene.index.DirectoryReader)4 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)4 Term (org.apache.lucene.index.Term)4 Test (org.junit.Test)3 ArrayList (java.util.ArrayList)2 HashMap (java.util.HashMap)1 HashSet (java.util.HashSet)1 CopyOnWriteArrayList (java.util.concurrent.CopyOnWriteArrayList)1 StoredField (org.apache.lucene.document.StoredField)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 ScoreDoc (org.apache.lucene.search.ScoreDoc)1