use of org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method testTokenType.
public void testTokenType() throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
// test that subwords and catenated subwords have
// the correct offsets.
Token token = new Token("foo-bar", 5, 12);
token.setType("mytype");
WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(token), DEFAULT_WORD_DELIM_TABLE, flags, null);
assertTokenStreamContents(wdf, new String[] { "foobar", "foo", "bar" }, new String[] { "mytype", "mytype", "mytype" });
}
use of org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method doSplitPossessive.
public void doSplitPossessive(int stemPossessive, final String input, final String... output) throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS;
flags |= (stemPossessive == 1) ? STEM_ENGLISH_POSSESSIVE : 0;
WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(keywordMockTokenizer(input), flags, null);
assertTokenStreamContents(wdf, output);
}
use of org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method testOffsetChange3.
public void testOffsetChange3() throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("(übelkeit", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null);
assertTokenStreamContents(wdf, new String[] { "übelkeit" }, new int[] { 8 }, new int[] { 16 });
}
use of org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method testOffsetChange.
public void testOffsetChange() throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("übelkeit)", 7, 16)), DEFAULT_WORD_DELIM_TABLE, flags, null);
assertTokenStreamContents(wdf, new String[] { "übelkeit" }, new int[] { 7 }, new int[] { 15 });
}
use of org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilter in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method testOffsetChange2.
public void testOffsetChange2() throws Exception {
int flags = GENERATE_WORD_PARTS | GENERATE_NUMBER_PARTS | CATENATE_ALL | SPLIT_ON_CASE_CHANGE | SPLIT_ON_NUMERICS | STEM_ENGLISH_POSSESSIVE;
WordDelimiterGraphFilter wdf = new WordDelimiterGraphFilter(new CannedTokenStream(new Token("(übelkeit", 7, 17)), DEFAULT_WORD_DELIM_TABLE, flags, null);
// illegal offsets:
assertTokenStreamContents(wdf, new String[] { "übelkeit" }, new int[] { 7 }, new int[] { 17 });
}
Aggregations