Search in sources :

Example 26 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testEmptyAtStart.

public void testEmptyAtStart() throws Exception {
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
    suggester.build(new InputArrayIterator(new Input[0]));
    suggester.add(new BytesRef("a penny saved is a penny earned"), null, 10, new BytesRef("foobaz"));
    suggester.add(new BytesRef("lend me your ear"), null, 8, new BytesRef("foobar"));
    suggester.refresh();
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, true, true);
    assertEquals(2, results.size());
    assertEquals("a penny saved is a penny earned", results.get(0).key);
    assertEquals("a penny saved is a penny <b>ear</b>ned", results.get(0).highlightKey);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    assertEquals("lend me your ear", results.get(1).key);
    assertEquals("lend me your <b>ear</b>", results.get(1).highlightKey);
    assertEquals(8, results.get(1).value);
    assertEquals(new BytesRef("foobar"), results.get(1).payload);
    results = suggester.lookup(TestUtil.stringToCharSequence("ear ", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("lend me your ear", results.get(0).key);
    assertEquals("lend me your <b>ear</b>", results.get(0).highlightKey);
    assertEquals(8, results.get(0).value);
    assertEquals(new BytesRef("foobar"), results.get(0).payload);
    results = suggester.lookup(TestUtil.stringToCharSequence("pen", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a penny saved is a penny earned", results.get(0).key);
    assertEquals("a <b>pen</b>ny saved is a <b>pen</b>ny earned", results.get(0).highlightKey);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    results = suggester.lookup(TestUtil.stringToCharSequence("p", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a penny saved is a penny earned", results.get(0).key);
    assertEquals("a <b>p</b>enny saved is a <b>p</b>enny earned", results.get(0).highlightKey);
    assertEquals(10, results.get(0).value);
    assertEquals(new BytesRef("foobaz"), results.get(0).payload);
    suggester.close();
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 27 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project SearchServices by Alfresco.

the class AsyncBuildSuggestComponent method toSuggesterResult.

/**
 * Convert NamedList (suggester response) to {@link SuggesterResult}
 */
private SuggesterResult toSuggesterResult(Map<String, SimpleOrderedMap<NamedList<Object>>> suggestionsMap) {
    SuggesterResult result = new SuggesterResult();
    if (suggestionsMap == null) {
        return result;
    }
    // for each token
    for (Map.Entry<String, SimpleOrderedMap<NamedList<Object>>> entry : suggestionsMap.entrySet()) {
        String suggesterName = entry.getKey();
        for (Iterator<Map.Entry<String, NamedList<Object>>> suggestionsIter = entry.getValue().iterator(); suggestionsIter.hasNext(); ) {
            Map.Entry<String, NamedList<Object>> suggestions = suggestionsIter.next();
            String tokenString = suggestions.getKey();
            List<LookupResult> lookupResults = new ArrayList<>();
            NamedList<Object> suggestion = suggestions.getValue();
            // for each suggestion
            for (int j = 0; j < suggestion.size(); j++) {
                String property = suggestion.getName(j);
                if (property.equals(SuggesterResultLabels.SUGGESTIONS)) {
                    @SuppressWarnings("unchecked") List<NamedList<Object>> suggestionEntries = (List<NamedList<Object>>) suggestion.getVal(j);
                    for (NamedList<Object> suggestionEntry : suggestionEntries) {
                        String term = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_TERM);
                        Long weight = (Long) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_WEIGHT);
                        String payload = (String) suggestionEntry.get(SuggesterResultLabels.SUGGESTION_PAYLOAD);
                        LookupResult res = new LookupResult(new CharsRef(term), weight, new BytesRef(payload));
                        lookupResults.add(res);
                    }
                }
                result.add(suggesterName, tokenString, lookupResults);
            }
        }
    }
    return result;
}
Also used : NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) CharsRef(org.apache.lucene.util.CharsRef) SuggesterResult(org.apache.solr.spelling.suggest.SuggesterResult) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) List(java.util.List) ArrayList(java.util.ArrayList) LinkedList(java.util.LinkedList) NamedList(org.apache.solr.common.util.NamedList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) Map(java.util.Map) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) HashMap(java.util.HashMap) BytesRef(org.apache.lucene.util.BytesRef)

Example 28 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project SearchServices by Alfresco.

the class AsyncBuildSuggestComponent method toNamedList.

/**
 * Convert {@link SuggesterResult} to NamedList for constructing responses
 */
private void toNamedList(SuggesterResult suggesterResult, Map<String, SimpleOrderedMap<NamedList<Object>>> resultObj) {
    for (String suggesterName : suggesterResult.getSuggesterNames()) {
        SimpleOrderedMap<NamedList<Object>> results = new SimpleOrderedMap<>();
        for (String token : suggesterResult.getTokens(suggesterName)) {
            SimpleOrderedMap<Object> suggestionBody = new SimpleOrderedMap<>();
            List<LookupResult> lookupResults = suggesterResult.getLookupResult(suggesterName, token);
            suggestionBody.add(SuggesterResultLabels.SUGGESTION_NUM_FOUND, lookupResults.size());
            List<SimpleOrderedMap<Object>> suggestEntriesNamedList = new ArrayList<>();
            for (LookupResult lookupResult : lookupResults) {
                String suggestionString = lookupResult.key.toString();
                long weight = lookupResult.value;
                String payload = (lookupResult.payload != null) ? lookupResult.payload.utf8ToString() : "";
                SimpleOrderedMap<Object> suggestEntryNamedList = new SimpleOrderedMap<>();
                suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_TERM, suggestionString);
                suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_WEIGHT, weight);
                suggestEntryNamedList.add(SuggesterResultLabels.SUGGESTION_PAYLOAD, payload);
                suggestEntriesNamedList.add(suggestEntryNamedList);
            }
            suggestionBody.add(SuggesterResultLabels.SUGGESTIONS, suggestEntriesNamedList);
            results.add(token, suggestionBody);
        }
        resultObj.put(suggesterName, results);
    }
}
Also used : NamedList(org.apache.solr.common.util.NamedList) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) ArrayList(java.util.ArrayList) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap)

Example 29 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testEmpty.

public void testEmpty() throws Exception {
    Analyzer analyzer = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", analyzer);
    suggester.build(new InputArrayIterator(new Input[0]));
    List<LookupResult> result = suggester.lookup("a", false, 20);
    assertTrue(result.isEmpty());
    IOUtils.close(analyzer, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 30 with LookupResult

use of org.apache.lucene.search.suggest.Lookup.LookupResult in project lucene-solr by apache.

the class FuzzySuggesterTest method testRandom.

public void testRandom() throws Exception {
    int numQueries = atLeast(100);
    final List<TermFreqPayload2> slowCompletor = new ArrayList<>();
    final TreeSet<String> allPrefixes = new TreeSet<>();
    final Set<String> seen = new HashSet<>();
    Input[] keys = new Input[numQueries];
    boolean preserveSep = random().nextBoolean();
    boolean unicodeAware = random().nextBoolean();
    final int numStopChars = random().nextInt(10);
    final boolean preserveHoles = random().nextBoolean();
    if (VERBOSE) {
        System.out.println("TEST: " + numQueries + " words; preserveSep=" + preserveSep + " ; unicodeAware=" + unicodeAware + " numStopChars=" + numStopChars + " preserveHoles=" + preserveHoles);
    }
    for (int i = 0; i < numQueries; i++) {
        int numTokens = TestUtil.nextInt(random(), 1, 4);
        String key;
        String analyzedKey;
        while (true) {
            key = "";
            analyzedKey = "";
            boolean lastRemoved = false;
            for (int token = 0; token < numTokens; token++) {
                String s;
                while (true) {
                    // TODO: would be nice to fix this slowCompletor/comparator to
                    // use full range, but we might lose some coverage too...
                    s = TestUtil.randomSimpleString(random());
                    if (s.length() > 0) {
                        if (token > 0) {
                            key += " ";
                        }
                        if (preserveSep && analyzedKey.length() > 0 && (unicodeAware ? analyzedKey.codePointAt(analyzedKey.codePointCount(0, analyzedKey.length()) - 1) != ' ' : analyzedKey.charAt(analyzedKey.length() - 1) != ' ')) {
                            analyzedKey += " ";
                        }
                        key += s;
                        if (s.length() == 1 && isStopChar(s.charAt(0), numStopChars)) {
                            if (preserveSep && preserveHoles) {
                                analyzedKey += '';
                            }
                            lastRemoved = true;
                        } else {
                            analyzedKey += s;
                            lastRemoved = false;
                        }
                        break;
                    }
                }
            }
            analyzedKey = analyzedKey.replaceAll("(^| )$", "");
            if (preserveSep && lastRemoved) {
                analyzedKey += " ";
            }
            // Don't add same surface form more than once:
            if (!seen.contains(key)) {
                seen.add(key);
                break;
            }
        }
        for (int j = 1; j < key.length(); j++) {
            allPrefixes.add(key.substring(0, j));
        }
        // we can probably do Integer.MAX_VALUE here, but why worry.
        int weight = random().nextInt(1 << 24);
        keys[i] = new Input(key, weight);
        slowCompletor.add(new TermFreqPayload2(key, analyzedKey, weight));
    }
    if (VERBOSE) {
        // Don't just sort original list, to avoid VERBOSE
        // altering the test:
        List<TermFreqPayload2> sorted = new ArrayList<>(slowCompletor);
        Collections.sort(sorted);
        for (TermFreqPayload2 ent : sorted) {
            System.out.println("  surface='" + ent.surfaceForm + " analyzed='" + ent.analyzedForm + "' weight=" + ent.weight);
        }
    }
    Analyzer a = new MockTokenEatingAnalyzer(numStopChars, preserveHoles);
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, preserveSep ? AnalyzingSuggester.PRESERVE_SEP : 0, 256, -1, true, 1, false, 1, 3, unicodeAware);
    suggester.build(new InputArrayIterator(keys));
    for (String prefix : allPrefixes) {
        if (VERBOSE) {
            System.out.println("\nTEST: prefix=" + prefix);
        }
        final int topN = TestUtil.nextInt(random(), 1, 10);
        List<LookupResult> r = suggester.lookup(TestUtil.stringToCharSequence(prefix, random()), false, topN);
        // 2. go thru whole set to find suggestions:
        List<LookupResult> matches = new ArrayList<>();
        // "Analyze" the key:
        String[] tokens = prefix.split(" ");
        StringBuilder builder = new StringBuilder();
        boolean lastRemoved = false;
        for (int i = 0; i < tokens.length; i++) {
            String token = tokens[i];
            if (preserveSep && builder.length() > 0 && !builder.toString().endsWith(" ")) {
                builder.append(' ');
            }
            if (token.length() == 1 && isStopChar(token.charAt(0), numStopChars)) {
                if (preserveSep && preserveHoles) {
                    builder.append("");
                }
                lastRemoved = true;
            } else {
                builder.append(token);
                lastRemoved = false;
            }
        }
        String analyzedKey = builder.toString();
        // issue open for this):
        while (true) {
            String s = analyzedKey.replaceAll("(^| )$", "");
            s = s.replaceAll("\\s+$", "");
            if (s.equals(analyzedKey)) {
                break;
            }
            analyzedKey = s;
        }
        if (analyzedKey.length() == 0) {
            // string!  You get no results, not all results...
            continue;
        }
        if (preserveSep && (prefix.endsWith(" ") || lastRemoved)) {
            analyzedKey += " ";
        }
        if (VERBOSE) {
            System.out.println("  analyzed: " + analyzedKey);
        }
        TokenStreamToAutomaton tokenStreamToAutomaton = suggester.getTokenStreamToAutomaton();
        // NOTE: not great that we ask the suggester to give
        // us the "answer key" (ie maybe we have a bug in
        // suggester.toLevA ...) ... but testRandom2() fixes
        // this:
        Automaton automaton = suggester.convertAutomaton(suggester.toLevenshteinAutomata(suggester.toLookupAutomaton(analyzedKey)));
        assertTrue(automaton.isDeterministic());
        // TODO: could be faster... but it's slowCompletor for a reason
        BytesRefBuilder spare = new BytesRefBuilder();
        for (TermFreqPayload2 e : slowCompletor) {
            spare.copyChars(e.analyzedForm);
            FiniteStringsIterator finiteStrings = new FiniteStringsIterator(suggester.toAutomaton(spare.get(), tokenStreamToAutomaton));
            for (IntsRef string; (string = finiteStrings.next()) != null; ) {
                int p = 0;
                BytesRef ref = Util.toBytesRef(string, spare);
                boolean added = false;
                for (int i = ref.offset; i < ref.length; i++) {
                    int q = automaton.step(p, ref.bytes[i] & 0xff);
                    if (q == -1) {
                        break;
                    } else if (automaton.isAccept(q)) {
                        matches.add(new LookupResult(e.surfaceForm, e.weight));
                        added = true;
                        break;
                    }
                    p = q;
                }
                if (!added && automaton.isAccept(p)) {
                    matches.add(new LookupResult(e.surfaceForm, e.weight));
                }
            }
        }
        assertTrue(numStopChars > 0 || matches.size() > 0);
        if (matches.size() > 1) {
            Collections.sort(matches, new Comparator<LookupResult>() {

                @Override
                public int compare(LookupResult left, LookupResult right) {
                    int cmp = Float.compare(right.value, left.value);
                    if (cmp == 0) {
                        return left.compareTo(right);
                    } else {
                        return cmp;
                    }
                }
            });
        }
        if (matches.size() > topN) {
            matches = matches.subList(0, topN);
        }
        if (VERBOSE) {
            System.out.println("  expected:");
            for (LookupResult lr : matches) {
                System.out.println("    key=" + lr.key + " weight=" + lr.value);
            }
            System.out.println("  actual:");
            for (LookupResult lr : r) {
                System.out.println("    key=" + lr.key + " weight=" + lr.value);
            }
        }
        assertEquals(prefix + "  " + topN, matches.size(), r.size());
        for (int hit = 0; hit < r.size(); hit++) {
            //System.out.println("  check hit " + hit);
            assertEquals(prefix + "  " + topN, matches.get(hit).key.toString(), r.get(hit).key.toString());
            assertEquals(matches.get(hit).value, r.get(hit).value, 0f);
        }
    }
    IOUtils.close(a, tempDir);
}
Also used : FiniteStringsIterator(org.apache.lucene.util.automaton.FiniteStringsIterator) ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Input(org.apache.lucene.search.suggest.Input) TreeSet(java.util.TreeSet) IntsRef(org.apache.lucene.util.IntsRef) BytesRef(org.apache.lucene.util.BytesRef) HashSet(java.util.HashSet) Directory(org.apache.lucene.store.Directory) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) TokenStreamToAutomaton(org.apache.lucene.analysis.TokenStreamToAutomaton) Automaton(org.apache.lucene.util.automaton.Automaton) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) TokenStreamToAutomaton(org.apache.lucene.analysis.TokenStreamToAutomaton)

Aggregations

LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)65 Input (org.apache.lucene.search.suggest.Input)48 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)48 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)45 Analyzer (org.apache.lucene.analysis.Analyzer)43 Directory (org.apache.lucene.store.Directory)36 BytesRef (org.apache.lucene.util.BytesRef)22 ArrayList (java.util.ArrayList)14 Path (java.nio.file.Path)11 HashSet (java.util.HashSet)9 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)7 Reader (java.io.Reader)6 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)6 Tokenizer (org.apache.lucene.analysis.Tokenizer)6 HashMap (java.util.HashMap)5 Token (org.apache.lucene.analysis.Token)5 TokenStream (org.apache.lucene.analysis.TokenStream)5 LinkedList (java.util.LinkedList)4 CharArraySet (org.apache.lucene.analysis.CharArraySet)4 SuggesterResult (org.apache.solr.spelling.suggest.SuggesterResult)4