Search in sources :

Example 1 with MockGraphTokenFilter

use of org.apache.lucene.analysis.MockGraphTokenFilter in project lucene-solr by apache.

the class TestSynonymGraphFilter method testRandomGraphAfter.

// Adds MockGraphTokenFilter after SynFilter:
public void testRandomGraphAfter() throws Exception {
    final int numIters = atLeast(3);
    for (int i = 0; i < numIters; i++) {
        SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
        final int numEntries = atLeast(10);
        for (int j = 0; j < numEntries; j++) {
            add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
        }
        final SynonymMap map = b.build();
        final boolean ignoreCase = random().nextBoolean();
        final boolean doFlatten = random().nextBoolean();
        final Analyzer analyzer = new Analyzer() {

            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
                TokenStream syns = new SynonymGraphFilter(tokenizer, map, ignoreCase);
                TokenStream graph = new MockGraphTokenFilter(random(), syns);
                if (doFlatten) {
                    graph = new FlattenGraphFilter(graph);
                }
                return new TokenStreamComponents(tokenizer, graph);
            }
        };
        checkRandomData(random(), analyzer, 100);
        analyzer.close();
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) CharsRefBuilder(org.apache.lucene.util.CharsRefBuilder) IntsRefBuilder(org.apache.lucene.util.IntsRefBuilder) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) FlattenGraphFilter(org.apache.lucene.analysis.core.FlattenGraphFilter) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) MockGraphTokenFilter(org.apache.lucene.analysis.MockGraphTokenFilter) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 2 with MockGraphTokenFilter

use of org.apache.lucene.analysis.MockGraphTokenFilter in project lucene-solr by apache.

the class TestStandardAnalyzer method testRandomHugeStringsGraphAfter.

// Adds random graph after:
public void testRandomHugeStringsGraphAfter() throws Exception {
    Random random = random();
    Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new StandardTokenizer(newAttributeFactory());
            TokenStream tokenStream = new MockGraphTokenFilter(random(), tokenizer);
            return new TokenStreamComponents(tokenizer, tokenStream);
        }
    };
    checkRandomData(random, analyzer, 100 * RANDOM_MULTIPLIER, 8192);
    analyzer.close();
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Random(java.util.Random) MockGraphTokenFilter(org.apache.lucene.analysis.MockGraphTokenFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Example 3 with MockGraphTokenFilter

use of org.apache.lucene.analysis.MockGraphTokenFilter in project lucene-solr by apache.

the class TestSynonymMapFilter method testRandom2GraphAfter.

// NOTE: this is an invalid test... SynFilter today can't
// properly consume a graph... we can re-enable this once
// we fix that...
/*
  // Adds MockGraphTokenFilter before SynFilter:
  public void testRandom2GraphBefore() throws Exception {
    final int numIters = atLeast(10);
    Random random = random();
    for (int i = 0; i < numIters; i++) {
      b = new SynonymMap.Builder(random.nextBoolean());
      final int numEntries = atLeast(10);
      for (int j = 0; j < numEntries; j++) {
        add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
      }
      final SynonymMap map = b.build();
      final boolean ignoreCase = random.nextBoolean();
      
      final Analyzer analyzer = new Analyzer() {
        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
          Tokenizer tokenizer = new MockTokenizer(reader, MockTokenizer.SIMPLE, true);
          TokenStream graph = new MockGraphTokenFilter(random(), tokenizer);
          return new TokenStreamComponents(tokenizer, new SynonymFilter(graph, map, ignoreCase));
        }
      };

      checkRandomData(random, analyzer, 1000*RANDOM_MULTIPLIER);
    }
  }
  */
// Adds MockGraphTokenFilter after SynFilter:
public void testRandom2GraphAfter() throws Exception {
    final int numIters = atLeast(3);
    Random random = random();
    for (int i = 0; i < numIters; i++) {
        b = new SynonymMap.Builder(random.nextBoolean());
        final int numEntries = atLeast(10);
        for (int j = 0; j < numEntries; j++) {
            add(randomNonEmptyString(), randomNonEmptyString(), random.nextBoolean());
        }
        final SynonymMap map = b.build();
        final boolean ignoreCase = random.nextBoolean();
        final Analyzer analyzer = new Analyzer() {

            @Override
            protected TokenStreamComponents createComponents(String fieldName) {
                Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
                TokenStream syns = new SynonymFilter(tokenizer, map, ignoreCase);
                TokenStream graph = new MockGraphTokenFilter(random(), syns);
                return new TokenStreamComponents(tokenizer, graph);
            }
        };
        checkRandomData(random, analyzer, 100);
        analyzer.close();
    }
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Analyzer(org.apache.lucene.analysis.Analyzer) MockAnalyzer(org.apache.lucene.analysis.MockAnalyzer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Random(java.util.Random) MockGraphTokenFilter(org.apache.lucene.analysis.MockGraphTokenFilter) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 4 with MockGraphTokenFilter

use of org.apache.lucene.analysis.MockGraphTokenFilter in project lucene-solr by apache.

the class TestJapaneseTokenizer method testRandomHugeStringsMockGraphAfter.

public void testRandomHugeStringsMockGraphAfter() throws Exception {
    // Randomly inject graph tokens after JapaneseTokenizer:
    Random random = random();
    Analyzer analyzer = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new JapaneseTokenizer(newAttributeFactory(), readDict(), false, Mode.SEARCH);
            TokenStream graph = new MockGraphTokenFilter(random(), tokenizer);
            return new TokenStreamComponents(tokenizer, graph);
        }
    };
    checkRandomData(random, analyzer, 20 * RANDOM_MULTIPLIER, 8192);
    analyzer.close();
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) Random(java.util.Random) MockGraphTokenFilter(org.apache.lucene.analysis.MockGraphTokenFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer)

Aggregations

Analyzer (org.apache.lucene.analysis.Analyzer)4 MockGraphTokenFilter (org.apache.lucene.analysis.MockGraphTokenFilter)4 TokenStream (org.apache.lucene.analysis.TokenStream)4 Tokenizer (org.apache.lucene.analysis.Tokenizer)4 Random (java.util.Random)3 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)2 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)2 FlattenGraphFilter (org.apache.lucene.analysis.core.FlattenGraphFilter)1 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)1 CharsRefBuilder (org.apache.lucene.util.CharsRefBuilder)1 IntsRefBuilder (org.apache.lucene.util.IntsRefBuilder)1