Search in sources :

Example 71 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class TestFreeTextSuggester method testIllegalByteDuringQuery.

public void testIllegalByteDuringQuery() throws Exception {
    // Default separator is INFORMATION SEPARATOR TWO
    // (0x1e), so no input token is allowed to contain it
    Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("foo bar baz", 50));
    Analyzer analyzer = new MockAnalyzer(random());
    FreeTextSuggester sug = new FreeTextSuggester(analyzer);
    sug.build(new InputArrayIterator(keys));
    expectThrows(IllegalArgumentException.class, () -> {
        sug.lookup("foob", 10);
    });
    analyzer.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 72 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class FuzzySuggesterTest method testRandom2.

public void testRandom2() throws Throwable {
    final int NUM = atLeast(200);
    final List<Input> answers = new ArrayList<>();
    final Set<String> seen = new HashSet<>();
    for (int i = 0; i < NUM; i++) {
        final String s = randomSimpleString(8);
        if (!seen.contains(s)) {
            answers.add(new Input(s, random().nextInt(1000)));
            seen.add(s);
        }
    }
    Collections.sort(answers, new Comparator<Input>() {

        @Override
        public int compare(Input a, Input b) {
            return a.term.compareTo(b.term);
        }
    });
    if (VERBOSE) {
        System.out.println("\nTEST: targets");
        for (Input tf : answers) {
            System.out.println("  " + tf.term.utf8ToString() + " freq=" + tf.v);
        }
    }
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.KEYWORD, false);
    int maxEdits = random().nextBoolean() ? 1 : 2;
    int prefixLen = random().nextInt(4);
    boolean transpositions = random().nextBoolean();
    // TODO: test graph analyzers
    // TODO: test exactFirst / preserveSep permutations
    Directory tempDir = getDirectory();
    FuzzySuggester suggest = new FuzzySuggester(tempDir, "fuzzy", a, a, 0, 256, -1, true, maxEdits, transpositions, prefixLen, prefixLen, false);
    if (VERBOSE) {
        System.out.println("TEST: maxEdits=" + maxEdits + " prefixLen=" + prefixLen + " transpositions=" + transpositions + " num=" + NUM);
    }
    Collections.shuffle(answers, random());
    suggest.build(new InputArrayIterator(answers.toArray(new Input[answers.size()])));
    final int ITERS = atLeast(100);
    for (int iter = 0; iter < ITERS; iter++) {
        final String frag = randomSimpleString(6);
        if (VERBOSE) {
            System.out.println("\nTEST: iter frag=" + frag);
        }
        final List<LookupResult> expected = slowFuzzyMatch(prefixLen, maxEdits, transpositions, answers, frag);
        if (VERBOSE) {
            System.out.println("  expected: " + expected.size());
            for (LookupResult c : expected) {
                System.out.println("    " + c);
            }
        }
        final List<LookupResult> actual = suggest.lookup(frag, false, NUM);
        if (VERBOSE) {
            System.out.println("  actual: " + actual.size());
            for (LookupResult c : actual) {
                System.out.println("    " + c);
            }
        }
        Collections.sort(actual, new CompareByCostThenAlpha());
        final int limit = Math.min(expected.size(), actual.size());
        for (int ans = 0; ans < limit; ans++) {
            final LookupResult c0 = expected.get(ans);
            final LookupResult c1 = actual.get(ans);
            assertEquals("expected " + c0.key + " but got " + c1.key, 0, CHARSEQUENCE_COMPARATOR.compare(c0.key, c1.key));
            assertEquals(c0.value, c1.value);
        }
        assertEquals(expected.size(), actual.size());
    }
    IOUtils.close(a, tempDir);
}
Also used : ArrayList(java.util.ArrayList) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) HashSet(java.util.HashSet) Directory(org.apache.lucene.store.Directory)

Example 73 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class FuzzySuggesterTest method testEditSeps.

public void testEditSeps() throws Exception {
    Analyzer a = new MockAnalyzer(random());
    Directory tempDir = getDirectory();
    FuzzySuggester suggester = new FuzzySuggester(tempDir, "fuzzy", a, a, FuzzySuggester.PRESERVE_SEP, 2, -1, true, 2, true, 1, 3, false);
    List<Input> keys = Arrays.asList(new Input[] { new Input("foo bar", 40), new Input("foo bar baz", 50), new Input("barbaz", 60), new Input("barbazfoo", 10) });
    Collections.shuffle(keys, random());
    suggester.build(new InputArrayIterator(keys));
    assertEquals("[foo bar baz/50, foo bar/40]", suggester.lookup("foobar", false, 5).toString());
    assertEquals("[foo bar baz/50]", suggester.lookup("foobarbaz", false, 5).toString());
    assertEquals("[barbaz/60, barbazfoo/10]", suggester.lookup("bar baz", false, 5).toString());
    assertEquals("[barbazfoo/10]", suggester.lookup("bar baz foo", false, 5).toString());
    IOUtils.close(a, tempDir);
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) Directory(org.apache.lucene.store.Directory)

Example 74 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testHighlight.

public void testHighlight() throws Exception {
    Input[] keys = new Input[] { new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz")) };
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newDirectory(), a, a, 3, false);
    suggester.build(new InputArrayIterator(keys));
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("penn", random()), 10, true, true);
    assertEquals(1, results.size());
    assertEquals("a penny saved is a penny earned", results.get(0).key);
    assertEquals("a <b>penn</b>y saved is a <b>penn</b>y earned", results.get(0).highlightKey);
    suggester.close();
    a.close();
}
Also used : Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Example 75 with MockAnalyzer

use of org.apache.lucene.analysis.MockAnalyzer in project lucene-solr by apache.

the class AnalyzingInfixSuggesterTest method testContextNotAllTermsRequired.

public void testContextNotAllTermsRequired() throws Exception {
    Input[] keys = new Input[] { new Input("lend me your ear", 8, new BytesRef("foobar"), asSet("foo", "bar")), new Input("a penny saved is a penny earned", 10, new BytesRef("foobaz"), asSet("foo", "baz")) };
    Path tempDir = createTempDir("analyzingInfixContext");
    Analyzer a = new MockAnalyzer(random(), MockTokenizer.WHITESPACE, false);
    AnalyzingInfixSuggester suggester = new AnalyzingInfixSuggester(newFSDirectory(tempDir), a, a, 3, false);
    suggester.build(new InputArrayIterator(keys));
    // No context provided, all results returned
    List<LookupResult> results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), 10, false, true);
    assertEquals(2, results.size());
    LookupResult result = results.get(0);
    assertEquals("a penny saved is a penny earned", result.key);
    assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
    assertEquals(10, result.value);
    assertEquals(new BytesRef("foobaz"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("baz")));
    result = results.get(1);
    assertEquals("lend me your ear", result.key);
    assertEquals("lend me your <b>ear</b>", result.highlightKey);
    assertEquals(8, result.value);
    assertEquals(new BytesRef("foobar"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("bar")));
    // Both have "foo" context:
    results = suggester.lookup(TestUtil.stringToCharSequence("ear", random()), asSet("foo"), 10, false, true);
    assertEquals(2, results.size());
    result = results.get(0);
    assertEquals("a penny saved is a penny earned", result.key);
    assertEquals("a penny saved is a penny <b>ear</b>ned", result.highlightKey);
    assertEquals(10, result.value);
    assertEquals(new BytesRef("foobaz"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("baz")));
    result = results.get(1);
    assertEquals("lend me your ear", result.key);
    assertEquals("lend me your <b>ear</b>", result.highlightKey);
    assertEquals(8, result.value);
    assertEquals(new BytesRef("foobar"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("bar")));
    // Only one has "foo" context and len
    results = suggester.lookup(TestUtil.stringToCharSequence("len", random()), asSet("foo"), 10, false, true);
    assertEquals(1, results.size());
    result = results.get(0);
    assertEquals("lend me your ear", result.key);
    assertEquals("<b>len</b>d me your ear", result.highlightKey);
    assertEquals(8, result.value);
    assertEquals(new BytesRef("foobar"), result.payload);
    assertNotNull(result.contexts);
    assertEquals(2, result.contexts.size());
    assertTrue(result.contexts.contains(new BytesRef("foo")));
    assertTrue(result.contexts.contains(new BytesRef("bar")));
    suggester.close();
}
Also used : Path(java.nio.file.Path) Input(org.apache.lucene.search.suggest.Input) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) InputArrayIterator(org.apache.lucene.search.suggest.InputArrayIterator) LookupResult(org.apache.lucene.search.suggest.Lookup.LookupResult) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) BytesRef(org.apache.lucene.util.BytesRef)

Aggregations

MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)1164 Directory (org.apache.lucene.store.Directory)785 Document (org.apache.lucene.document.Document)775 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)265 Analyzer (org.apache.lucene.analysis.Analyzer)259 BytesRef (org.apache.lucene.util.BytesRef)252 StringField (org.apache.lucene.document.StringField)183 Term (org.apache.lucene.index.Term)183 RAMDirectory (org.apache.lucene.store.RAMDirectory)168 NumericDocValuesField (org.apache.lucene.document.NumericDocValuesField)165 Field (org.apache.lucene.document.Field)164 TextField (org.apache.lucene.document.TextField)159 Test (org.junit.Test)142 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)136 IndexReader (org.apache.lucene.index.IndexReader)134 IndexWriter (org.apache.lucene.index.IndexWriter)133 TermQuery (org.apache.lucene.search.TermQuery)121 FieldType (org.apache.lucene.document.FieldType)119 SortedDocValuesField (org.apache.lucene.document.SortedDocValuesField)116 IndexSearcher (org.apache.lucene.search.IndexSearcher)111