Examples with Token - org.apache.lucene.analysis.Token

Example 61 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellingQueryConverterTest method testSpecialChars.

@Test
public void testSpecialChars() {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    String original = "field_with_underscore:value_with_underscore";
    Collection<Token> tokens = converter.convert(original);
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
    original = "field_with_digits123:value_with_digits123";
    tokens = converter.convert(original);
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
    original = "field-with-hyphens:value-with-hyphens";
    tokens = converter.convert(original);
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
    // mix 'em up and add some to the value
    //    original = "field_with-123s:value_,.|with-hyphens";
    //    tokens = converter.convert(original);
    //    assertTrue("tokens is null and it shouldn't be", tokens != null);
    //    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    //    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
    original = "foo:bar^5.0";
    tokens = converter.convert(original);
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
    String firstKeyword = "value1";
    String secondKeyword = "value2";
    original = "field-with-parenthesis:(" + firstKeyword + " " + secondKeyword + ")";
    tokens = converter.convert(original);
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
    assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
    assertTrue("first Token is not " + firstKeyword, new ArrayList<>(tokens).get(0).toString().equals(firstKeyword));
    assertTrue("second Token is not " + secondKeyword, new ArrayList<>(tokens).get(1).toString().equals(secondKeyword));
}

Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) ArrayList(java.util.ArrayList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Example 62 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellingQueryConverterTest method test.

@Test
public void test() throws Exception {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    Collection<Token> tokens = converter.convert("field:foo");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1);
}

Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Example 63 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellingQueryConverterTest method isOffsetCorrect.

private boolean isOffsetCorrect(String s, Collection<Token> tokens) {
    for (Token token : tokens) {
        int start = token.startOffset();
        int end = token.endOffset();
        if (!s.substring(start, end).equals(token.toString()))
            return false;
    }
    return true;
}

Also used : Token(org.apache.lucene.analysis.Token)

Example 64 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class SpellingQueryConverterTest method testUnicode.

@Test
public void testUnicode() {
    SpellingQueryConverter converter = new SpellingQueryConverter();
    converter.init(new NamedList());
    converter.setAnalyzer(new WhitespaceAnalyzer());
    // chinese text value
    Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    tokens = converter.convert("text_购field:我购买了道具和服装。");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
    tokens = converter.convert("text_field:我购xyz买了道具和服装。");
    assertTrue("tokens is null and it shouldn't be", tokens != null);
    assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}

Also used : WhitespaceAnalyzer(org.apache.lucene.analysis.core.WhitespaceAnalyzer) NamedList(org.apache.solr.common.util.NamedList) Token(org.apache.lucene.analysis.Token) Test(org.junit.Test)

Example 65 with Token

use of org.apache.lucene.analysis.Token in project lucene-solr by apache.

the class TestLegacyFieldReuse method testNumericReuse.

public void testNumericReuse() throws IOException {
    LegacyIntField legacyIntField = new LegacyIntField("foo", 5, Field.Store.NO);
    // passing null
    TokenStream ts = legacyIntField.tokenStream(null, null);
    assertTrue(ts instanceof LegacyNumericTokenStream);
    assertEquals(LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, ((LegacyNumericTokenStream) ts).getPrecisionStep());
    assertNumericContents(5, ts);
    // now reuse previous stream
    legacyIntField = new LegacyIntField("foo", 20, Field.Store.NO);
    TokenStream ts2 = legacyIntField.tokenStream(null, ts);
    assertSame(ts, ts2);
    assertNumericContents(20, ts);
    // pass a bogus stream and ensure it's still ok
    legacyIntField = new LegacyIntField("foo", 2343, Field.Store.NO);
    TokenStream bogus = new CannedTokenStream(new Token("bogus", 0, 5));
    ts = legacyIntField.tokenStream(null, bogus);
    assertNotSame(bogus, ts);
    assertNumericContents(2343, ts);
    // pass another bogus stream (numeric, but different precision step!)
    legacyIntField = new LegacyIntField("foo", 42, Field.Store.NO);
    assert 3 != LegacyNumericUtils.PRECISION_STEP_DEFAULT;
    bogus = new LegacyNumericTokenStream(3);
    ts = legacyIntField.tokenStream(null, bogus);
    assertNotSame(bogus, ts);
    assertNumericContents(42, ts);
}

Also used : LegacyNumericTokenStream(org.apache.solr.legacy.LegacyNumericTokenStream) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) TokenStream(org.apache.lucene.analysis.TokenStream) Token(org.apache.lucene.analysis.Token) CannedTokenStream(org.apache.lucene.analysis.CannedTokenStream) LegacyIntField(org.apache.solr.legacy.LegacyIntField) LegacyNumericTokenStream(org.apache.solr.legacy.LegacyNumericTokenStream)

Aggregations

Token (org.apache.lucene.analysis.Token)100 CannedTokenStream (org.apache.lucene.analysis.CannedTokenStream)39 TokenStream (org.apache.lucene.analysis.TokenStream)31 Directory (org.apache.lucene.store.Directory)24 Test (org.junit.Test)23 Document (org.apache.lucene.document.Document)19 TextField (org.apache.lucene.document.TextField)19 BytesRef (org.apache.lucene.util.BytesRef)16 NamedList (org.apache.solr.common.util.NamedList)16 StringReader (java.io.StringReader)15 CharTermAttribute (org.apache.lucene.analysis.tokenattributes.CharTermAttribute)15 Analyzer (org.apache.lucene.analysis.Analyzer)14 ArrayList (java.util.ArrayList)13 Map (java.util.Map)13 Field (org.apache.lucene.document.Field)13 FieldType (org.apache.lucene.document.FieldType)11 IndexReader (org.apache.lucene.index.IndexReader)11 MockTokenizer (org.apache.lucene.analysis.MockTokenizer)10 Tokenizer (org.apache.lucene.analysis.Tokenizer)9 Date (java.util.Date)8