Search in sources :

Example 31 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestDelimitedPayloadTokenFilterFactory method testDelim.

public void testDelim() throws Exception {
    Reader reader = new StringReader("the*0.1 quick*0.1 red*0.1");
    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    ((Tokenizer) stream).setReader(reader);
    stream = tokenFilterFactory("DelimitedPayload", "encoder", "float", "delimiter", "*").create(stream);
    stream.reset();
    while (stream.incrementToken()) {
        PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
        assertNotNull(payAttr);
        byte[] payData = payAttr.getPayload().bytes;
        assertNotNull(payData);
        float payFloat = PayloadHelper.decodeFloat(payData);
        assertEquals(0.1f, payFloat, 0.0f);
    }
    stream.end();
    stream.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 32 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestDelimitedPayloadTokenFilterFactory method testEncoder.

public void testEncoder() throws Exception {
    Reader reader = new StringReader("the|0.1 quick|0.1 red|0.1");
    TokenStream stream = new MockTokenizer(MockTokenizer.WHITESPACE, false);
    ((Tokenizer) stream).setReader(reader);
    stream = tokenFilterFactory("DelimitedPayload", "encoder", "float").create(stream);
    stream.reset();
    while (stream.incrementToken()) {
        PayloadAttribute payAttr = stream.getAttribute(PayloadAttribute.class);
        assertNotNull(payAttr);
        byte[] payData = payAttr.getPayload().bytes;
        assertNotNull(payData);
        float payFloat = PayloadHelper.decodeFloat(payData);
        assertEquals(0.1f, payFloat, 0.0f);
    }
    stream.end();
    stream.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) TokenStream(org.apache.lucene.analysis.TokenStream) PayloadAttribute(org.apache.lucene.analysis.tokenattributes.PayloadAttribute) StringReader(java.io.StringReader) StringReader(java.io.StringReader) Reader(java.io.Reader) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer)

Example 33 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestRussianLightStemFilter method testKeyword.

public void testKeyword() throws IOException {
    final CharArraySet exclusionSet = new CharArraySet(asSet("энергии"), false);
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer source = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            TokenStream sink = new SetKeywordMarkerFilter(source, exclusionSet);
            return new TokenStreamComponents(source, new RussianLightStemFilter(sink));
        }
    };
    checkOneTerm(a, "энергии", "энергии");
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) CharArraySet(org.apache.lucene.analysis.CharArraySet) TokenStream(org.apache.lucene.analysis.TokenStream) SetKeywordMarkerFilter(org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter) Analyzer(org.apache.lucene.analysis.Analyzer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 34 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class ShingleFilterTest method testRandomHugeStrings.

/** blast some random large strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
    Random random = random();
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new ShingleFilter(tokenizer));
        }
    };
    checkRandomData(random, a, 100 * RANDOM_MULTIPLIER, 8192);
    a.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Random(java.util.Random) Analyzer(org.apache.lucene.analysis.Analyzer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Example 35 with MockTokenizer

use of org.apache.lucene.analysis.MockTokenizer in project lucene-solr by apache.

the class TestDaitchMokotoffSoundexFilter method testRandomStrings.

/** blast some random strings through the analyzer */
public void testRandomStrings() throws IOException {
    Analyzer a = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new DaitchMokotoffSoundexFilter(tokenizer, false));
        }
    };
    checkRandomData(random(), a, 1000 * RANDOM_MULTIPLIER);
    a.close();
    Analyzer b = new Analyzer() {

        @Override
        protected TokenStreamComponents createComponents(String fieldName) {
            Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
            return new TokenStreamComponents(tokenizer, new DaitchMokotoffSoundexFilter(tokenizer, false));
        }
    };
    checkRandomData(random(), b, 1000 * RANDOM_MULTIPLIER);
    b.close();
}
Also used : MockTokenizer(org.apache.lucene.analysis.MockTokenizer) Analyzer(org.apache.lucene.analysis.Analyzer) WhitespaceTokenizer(org.apache.lucene.analysis.core.WhitespaceTokenizer) Tokenizer(org.apache.lucene.analysis.Tokenizer) MockTokenizer(org.apache.lucene.analysis.MockTokenizer) KeywordTokenizer(org.apache.lucene.analysis.core.KeywordTokenizer)

Aggregations

MockTokenizer (org.apache.lucene.analysis.MockTokenizer)280 Tokenizer (org.apache.lucene.analysis.Tokenizer)204 Analyzer (org.apache.lucene.analysis.Analyzer)161 StringReader (java.io.StringReader)118 TokenStream (org.apache.lucene.analysis.TokenStream)116 KeywordTokenizer (org.apache.lucene.analysis.core.KeywordTokenizer)106 Reader (java.io.Reader)59 MockAnalyzer (org.apache.lucene.analysis.MockAnalyzer)54 CharArraySet (org.apache.lucene.analysis.CharArraySet)44 Directory (org.apache.lucene.store.Directory)36 Document (org.apache.lucene.document.Document)31 BytesRef (org.apache.lucene.util.BytesRef)25 SetKeywordMarkerFilter (org.apache.lucene.analysis.miscellaneous.SetKeywordMarkerFilter)21 TextField (org.apache.lucene.document.TextField)20 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)18 Field (org.apache.lucene.document.Field)17 FieldType (org.apache.lucene.document.FieldType)14 StringField (org.apache.lucene.document.StringField)11 Input (org.apache.lucene.search.suggest.Input)11 InputArrayIterator (org.apache.lucene.search.suggest.InputArrayIterator)11