Search in sources :

Example 71 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testCommitAfterBuild.

public void testCommitAfterBuild() throws Exception {
    performOperationWithAllOptionCombinations(suggester -> {
        suggester.build(new InputArrayIterator(sharedInputs));
        suggester.commit();
    });
}
Also used : InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator)

Example 72 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testBinaryContext.

public void testBinaryContext() throws Exception {
    byte[] context1 = new byte[4];
    byte[] context2 = new byte[5];
    byte[] context3 = new byte[1];
    context3[0] = (byte) 0xff;
    Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar"), asSet(context1, context2)), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet(context1, context3)) };
    Path tempDir = createTempDir("analyzingInfixContext");
    for (int iter = 0; iter < 2; iter++) {
        AnalyzingInfixSuggester suggester;
        Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
        if (iter == 0) {
            suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
            suggester.build(new InputArrayIterator(keys));
        } else {
            // Test again, after close/reopen:
            suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
        }
        // Both have context1:
        List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet(context1), 10, true, true);
        assertEquals(2, results.size());
        LookupResult result = results.get(0);
        assertEquals("a penny saved is a penny earned", result.key);
        assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
        assertEquals(10, result.value);
        assertEquals(new BytesRef("foobaz"), result.payload);
        assertNotNull(result.contexts);
        assertEquals(2, result.contexts.size());
        assertTrue(result.contexts.contains(new BytesRef(context1)));
        assertTrue(result.contexts.contains(new BytesRef(context3)));
        result = results.get(1);
        assertEquals("lend me your ear", result.key);
        assertEquals("lend me your <b>ear</b>", result.highlightKey);
        assertEquals(8, result.value);
        assertEquals(new BytesRef("foobar"), result.payload);
        assertNotNull(result.contexts);
        assertEquals(2, result.contexts.size());
        assertTrue(result.contexts.contains(new BytesRef(context1)));
        assertTrue(result.contexts.contains(new BytesRef(context2)));
        suggester.close();
        a.close();
    }
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 73 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testExactFirst.

public void testExactFirst() throws Exception {
    Analyzer a = getUnusualAnalyzer();
    int options = AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP;
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, options, 256, -1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 1), new Input("x y z", 3), new Input("x", 2), new Input("z z z", 20) }));
    for (int topN = 1; topN < 6; topN++) {
        List<LookupResult> results = suggester.lookup("x y", false, topN);
        //System.out.println("topN=" + topN + " " + results);
        assertEquals(Math.min(topN, 4), results.size());
        assertEquals("x y", results.get(0).key);
        assertEquals(1, results.get(0).value);
        if (topN > 1) {
            assertEquals("z z z", results.get(1).key);
            assertEquals(20, results.get(1).value);
            if (topN > 2) {
                assertEquals("x y z", results.get(2).key);
                assertEquals(3, results.get(2).value);
                if (topN > 3) {
                    assertEquals("x", results.get(3).key);
                    assertEquals(2, results.get(3).value);
                }
            }
        }
    }
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 74 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testNonExactFirst.

public void testNonExactFirst() throws Exception {
    Analyzer a = getUnusualAnalyzer();
    Directory tempDir = getDirectory();
    AnalyzingSuggester suggester = new AnalyzingSuggester(tempDir, "suggest", a, a, AnalyzingSuggester.PRESERVE_SEP, 256, -1, true);
    suggester.build(new InputArrayIterator(new Input[] { new Input("x y", 1), new Input("x y z", 3), new Input("x", 2), new Input("z z z", 20) }));
    for (int topN = 1; topN < 6; topN++) {
        List<LookupResult> results = suggester.lookup("p", false, topN);
        assertEquals(Math.min(topN, 4), results.size());
        assertEquals("z z z", results.get(0).key);
        assertEquals(20, results.get(0).value);
        if (topN > 1) {
            assertEquals("x y z", results.get(1).key);
            assertEquals(3, results.get(1).value);
            if (topN > 2) {
                assertEquals("x", results.get(2).key);
                assertEquals(2, results.get(2).value);
                if (topN > 3) {
                    assertEquals("x y", results.get(3).key);
                    assertEquals(1, results.get(3).value);
                }
            }
        }
    }
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 75 with InputArrayIterator

use of org.apache.lucene.search.suggest.InputArrayIterator in project lucene-solr by apache.

the class AnalyzingSuggesterTest method testRandomRealisticKeys.

public void testRandomRealisticKeys() throws IOException {
    LineFileDocs lineFile = new LineFileDocs(random());
    Map<String, Long> mapping = new HashMap<>();
    List<Input> keys = new ArrayList<>();
    // this might bring up duplicates
    int howMany = atLeast(100);
    for (int i = 0; i < howMany; i++) {
        Document nextDoc = lineFile.nextDoc();
        String title = nextDoc.getField("title").stringValue();
        int randomWeight = random().nextInt(100);
        keys.add(new Input(title, randomWeight));
        if (!mapping.containsKey(title) || mapping.get(title) < randomWeight) {
            mapping.put(title, Long.valueOf(randomWeight));
        }
    }
    Analyzer indexAnalyzer = new MockAnalyzer(random());
    Analyzer queryAnalyzer = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    AnalyzingSuggester analyzingSuggester = new AnalyzingSuggester(tempDir, "suggest", indexAnalyzer, queryAnalyzer, AnalyzingSuggester.EXACT_FIRST | AnalyzingSuggester.PRESERVE_SEP, 256, -1, random().nextBoolean());
    boolean doPayloads = random().nextBoolean();
    if (doPayloads) {
        List<Input> keysAndPayloads = new ArrayList<>();
        for (Input termFreq : keys) {
            keysAndPayloads.add(new Input(termFreq.term, termFreq.v, new BytesRef(Long.toString(termFreq.v))));
        }
        analyzingSuggester.build(new InputArrayIterator(keysAndPayloads));
    } else {
        analyzingSuggester.build(new InputArrayIterator(keys));
    }
    for (Input termFreq : keys) {
        List<LookupResult> lookup = analyzingSuggester.lookup(termFreq.term.utf8ToString(), false, keys.size());
        for (LookupResult lookupResult : lookup) {
            assertEquals(mapping.get(lookupResult.key), Long.valueOf(lookupResult.value));
            if (doPayloads) {
                assertEquals(lookupResult.payload.utf8ToString(), Long.toString(lookupResult.value));
            } else {
                assertNull(lookupResult.payload);
            }
        }
    }
    IOUtils.close(lineFile, indexAnalyzer, queryAnalyzer, tempDir);
}
Also used : HashMap(java.util.HashMap) ArrayList(java.util.ArrayList) Document(org.apache.lucene.document.Document) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) BytesRef(org.apache.lucene.util.BytesRef) LineFileDocs(org.apache.lucene.util.LineFileDocs) Directory(org.apache.lucene.store.Directory)

Aggregations

InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)76 Input (org.apache.lucene.search.suggest.Input)71 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)67 Analyzer (org.apache.lucene.analysis.Analyzer)65 LookupResult (org.apache.lucene.search.suggest.Lookup.LookupResult)48 Directory (org.apache.lucene.store.Directory)43 BytesRef (org.apache.lucene.util.BytesRef)26 Path (java.nio.file.Path)17 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)11 Tokenizer (org.apache.lucene.analysis.Tokenizer)10 Reader (java.io.Reader)8 ArrayList (java.util.ArrayList)8 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)8 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)7 TokenStream (org.apache.lucene.analysis.TokenStream)6 HashSet (java.util.HashSet)5 CharArraySet (org.apache.lucene.analysis.CharArraySet)5 Token (org.apache.lucene.analysis.Token)5 InputStream (java.io.InputStream)4 OutputStream (java.io.OutputStream)4