Search in sources :

Example 66 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestKeepWordFilter method testRandomStrings.

/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
    final Set<String> words = new HashSet<>();
    words.add("a");
    words.add("b");
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream stream = new KeepWordFilter(tokenizer, new CharArraySet(words, true));
            return new TokenStreamComponents(tokenizer, stream);
        }
    };
    checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) HashSet(java.util.HashSet)

Example 67 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestLengthFilterFactory method testPositionIncrements.

public void testPositionIncrements() throws Exception {
    Reader reader = new StringReader("foo foobar super-duper-trooper");
    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    ((Tokenizer) stream).setReader(reader);
    stream = tokenFilterFactory("Length", LengthFilterFactory.MIN_KEY, "4", LengthFilterFactory.MAX_KEY, "10").create(stream);
    assertTokenStreamContents(stream, new String[] { "foobar" }, new int[] { 2 });
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 68 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestLengthFilterFactory method testInvalidArguments.

/** Test that invalid arguments result in exception */
public void testInvalidArguments() throws Exception {
    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
        Reader reader = new StringReader("foo foobar super-duper-trooper");
        TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
        ((Tokenizer) stream).setReader(reader);
        tokenFilterFactory("Length", LengthFilterFactory.MIN_KEY, "5", LengthFilterFactory.MAX_KEY, "4").create(stream);
    });
    assertTrue(expected.getMessage().contains("maximum length must not be greater than minimum length"));
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 69 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestCapitalizationFilter method testRandomString.

/** blast some random strings through the analyzer */
public void testRandomString() throws Exception {
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new CapitalizationFilter(tokenizer));
        }
    };
    checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer) CapitalizationFilter(org.apache.lucene.analysis.miscellaneous.CapitalizationFilter)

Example 70 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestFingerprintFilter method testAllDupValues.

public void testAllDupValues() throws Exception {
    for (final boolean consumeAll : new boolean[] { true, false }) {
        MockTokenizer tokenizer = whitespaceMockTokenizer("B2 B2");
        tokenizer.setEnableChecks(consumeAll);
        TokenStream stream = new FingerprintFilter(tokenizer);
        assertTokenStreamContents(stream, new String[] { "B2" });
    }
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream)

Aggregations

MockTokenizer (org.apache.lucene.analysis.MockTokenizer)280 Tokenizer (org.apache.lucene.analysis.Tokenizer)204 Analyzer (org.apache.lucene.analysis.Analyzer)161 StringReader (java.io.StringReader)118 TokenStream (org.apache.lucene.analysis.TokenStream)116 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)106 Reader (java.io.Reader)59 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)54 CharArraySet (org.apache.lucene.analysis.CharArraySet)44 Directory (org.apache.lucene.store.Directory)36 Document (org.apache.lucene.document.Document)31 BytesRef (org.apache.lucene.util.BytesRef)25 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)21 TextField (org.apache.lucene.document.TextField)20 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)18 Field (org.apache.lucene.document.Field)17 FieldType (org.apache.lucene.document.FieldType)14 StringField (org.apache.lucene.document.StringField)11 Input (org.apache.lucene.search.suggest.Input)11 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)11