use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellingQueryConverterTest method testSpecialChars.
@Test
public void testSpecialChars() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
String original = "field_with_underscore:value_with_underscore";
Collection<Token> tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
original = "field_with_digits123:value_with_digits123";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
original = "field-with-hyphens:value-with-hyphens";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
// mix 'em up and add some to the value
// original = "field_with-123s:value_,.|with-hyphens";
// tokens = converter.convert(original);
// assertTrue("tokens is null and it shouldn't be", tokens != null);
// assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
// assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
original = "foo:bar^5.0";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
String firstKeyword = "value1";
String secondKeyword = "value2";
original = "field-with-parenthesis:(" + firstKeyword + " " + secondKeyword + ")";
tokens = converter.convert(original);
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 2", 2, tokens.size());
assertTrue("Token offsets do not match", isOffsetCorrect(original, tokens));
assertTrue("first Token is not " + firstKeyword, new ArrayList<>(tokens).get(0).toString().equals(firstKeyword));
assertTrue("second Token is not " + secondKeyword, new ArrayList<>(tokens).get(1).toString().equals(secondKeyword));
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellingQueryConverterTest method test.
@Test
public void test() throws Exception {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
Collection<Token> tokens = converter.convert("field:foo");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertTrue("tokens Size: " + tokens.size() + " is not: " + 1, tokens.size() == 1);
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellingQueryConverterTest method isOffsetCorrect.
private boolean isOffsetCorrect(String s, Collection<Token> tokens) {
for (Token token : tokens) {
int start = token.startOffset();
int end = token.endOffset();
if (!s.substring(start, end).equals(token.toString()))
return false;
}
return true;
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class SpellingQueryConverterTest method testUnicode.
@Test
public void testUnicode() {
SpellingQueryConverter converter = new SpellingQueryConverter();
converter.init(new NamedList());
converter.setAnalyzer(new WhitespaceAnalyzer());
// chinese text value
Collection<Token> tokens = converter.convert("text_field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_购field:我购买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
tokens = converter.convert("text_field:我购xyz买了道具和服装。");
assertTrue("tokens is null and it shouldn't be", tokens != null);
assertEquals("tokens Size: " + tokens.size() + " is not 1", 1, tokens.size());
}
use of org.apache.lucene.analysis.Token in project lucene-solr by apache.
the class TestLegacyFieldReuse method testNumericReuse.
public void testNumericReuse() throws IOException {
LegacyIntField legacyIntField = new LegacyIntField("foo", 5, Field.Store.NO);
// passing null
TokenStream ts = legacyIntField.tokenStream(null, null);
assertTrue(ts instanceof LegacyNumericTokenStream);
assertEquals(LegacyNumericUtils.PRECISION_STEP_DEFAULT_32, ((LegacyNumericTokenStream) ts).getPrecisionStep());
assertNumericContents(5, ts);
// now reuse previous stream
legacyIntField = new LegacyIntField("foo", 20, Field.Store.NO);
TokenStream ts2 = legacyIntField.tokenStream(null, ts);
assertSame(ts, ts2);
assertNumericContents(20, ts);
// pass a bogus stream and ensure it's still ok
legacyIntField = new LegacyIntField("foo", 2343, Field.Store.NO);
TokenStream bogus = new CannedTokenStream(new Token("bogus", 0, 5));
ts = legacyIntField.tokenStream(null, bogus);
assertNotSame(bogus, ts);
assertNumericContents(2343, ts);
// pass another bogus stream (numeric, but different precision step!)
legacyIntField = new LegacyIntField("foo", 42, Field.Store.NO);
assert 3 != LegacyNumericUtils.PRECISION_STEP_DEFAULT;
bogus = new LegacyNumericTokenStream(3);
ts = legacyIntField.tokenStream(null, bogus);
assertNotSame(bogus, ts);
assertNumericContents(42, ts);
}
Aggregations