Search in sources :

Example 96 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestSynonymGraphFilter method testNonPreservingMultiwordSynonyms.

/**
   * verify type of token and positionLengths on synonyms of different word counts, with non preserving, explicit rules.
   */
public void testNonPreservingMultiwordSynonyms() throws Exception {
    String testFile = "aaa => two words\n" + "bbb => one two, very many multiple words\n" + "ee ff, gg, h i j k, h i => one\n" + "cc dd => usa,united states,u s a,united states of america";
    Analyzer analyzer = solrSynsToAnalyzer(testFile);
    assertAnalyzesTo(analyzer, "aaa", new String[] { "two", "words" }, new int[] { 0, 0 }, new int[] { 3, 3 }, new String[] { "SYNONYM", "SYNONYM" }, new int[] { 1, 1 }, new int[] { 1, 1 });
    assertAnalyzesToPositions(analyzer, "amazing aaa", new String[] { "amazing", "two", "words" }, new String[] { "word", "SYNONYM", "SYNONYM" }, new int[] { 1, 1, 1 }, new int[] { 1, 1, 1 });
    assertAnalyzesTo(analyzer, "p bbb s", new String[] { "p", "one", "very", "two", "many", "multiple", "words", "s" }, new int[] { 0, 2, 2, 2, 2, 2, 2, 6 }, new int[] { 1, 5, 5, 5, 5, 5, 5, 7 }, new String[] { "word", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "SYNONYM", "word" }, new int[] { 1, 1, 0, 1, 0, 1, 1, 1 }, new int[] { 1, 1, 1, 3, 1, 1, 1, 1 });
    assertAnalyzesTo(analyzer, "p ee ff s", new String[] { "p", "one", "s" }, new int[] { 0, 2, 8 }, new int[] { 1, 7, 9 }, new String[] { "word", "SYNONYM", "word" }, new int[] { 1, 1, 1 }, new int[] { 1, 1, 1 });
    assertAnalyzesTo(analyzer, "p h i j s", new String[] { "p", "one", "j", "s" }, new int[] { 0, 2, 6, 8 }, new int[] { 1, 5, 7, 9 }, new String[] { "word", "SYNONYM", "word", "word" }, new int[] { 1, 1, 1, 1 }, new int[] { 1, 1, 1, 1 });
    analyzer.close();
}
Also used : Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 97 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestSynonymGraphFilter method testBuilderNoDedup.

public void testBuilderNoDedup() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(false);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    Analyzer a = getAnalyzer(b, true);
    assertAnalyzesTo(a, "a b", new String[] { "ab", "ab", "ab" }, new int[] { 1, 0, 0 });
    a.close();
}
Also used : CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 98 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestSynonymGraphFilter method testVanishingTermsWithFlatten.

// LUCENE-3375
public void testVanishingTermsWithFlatten() throws Exception {
    String testFile = "aaa => aaaa1 aaaa2 aaaa3\n" + "bbb => bbbb1 bbbb2\n";
    Analyzer analyzer = solrSynsToAnalyzer(testFile);
    assertAnalyzesTo(analyzer, "xyzzy bbb pot of gold", new String[] { "xyzzy", "bbbb1", "bbbb2", "pot", "of", "gold" });
    // xyzzy aaa pot of gold -> xyzzy aaaa1 aaaa2 aaaa3 gold
    assertAnalyzesTo(analyzer, "xyzzy aaa pot of gold", new String[] { "xyzzy", "aaaa1", "aaaa2", "aaaa3", "pot", "of", "gold" });
    analyzer.close();
}
Also used : Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 99 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestSynonymGraphFilter method testOutputHangsOffEnd.

public void testOutputHangsOffEnd() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    // b hangs off the end (no input token under it):
    add(b, "a", "a b", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);
    assertAnalyzesTo(a, "a", new String[] { "a", "b" }, new int[] { 0, 0 }, new int[] { 1, 1 }, null, new int[] { 1, 1 }, new int[] { 1, 1 }, true);
    a.close();
}
Also used : CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Example 100 with Analyzer

use of org.apache.lucene.analysis.Analyzer in project lucene-solr by apache.

the class TestSynonymGraphFilter method testDedup.

public void testDedup() throws Exception {
    SynonymMap.Builder b = new SynonymMap.Builder(true);
    final boolean keepOrig = false;
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    add(b, "a b", "ab", keepOrig);
    Analyzer a = getFlattenAnalyzer(b, true);
    assertAnalyzesTo(a, "a b", new String[] { "ab" }, new int[] { 1 });
    a.close();
}
Also used : CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)1020 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)396 Tokenizer (org.apache.lucene.analysis.Tokenizer)265 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)228 Document (org.apache.lucene.document.Document)207 Directory (org.apache.lucene.store.Directory)192 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)176 BytesRef (org.apache.lucene.util.BytesRef)122 Test (org.junit.Test)119 TokenStream (org.apache.lucene.analysis.TokenStream)107 RandomIndexWriter (org.apache.lucene.index.RandomIndexWriter)92 Term (org.apache.lucene.index.Term)92 IndexReader (org.apache.lucene.index.IndexReader)67 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)65 StandardAnalyzer (org.apache.lucene.analysis.standard.StandardAnalyzer)64 Input (org.apache.lucene.search.suggest.Input)63 CharArraySet (org.apache.lucene.analysis.CharArraySet)58 ArrayList (java.util.ArrayList)57 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)57 TextField (org.apache.lucene.document.TextField)55