use of org.apache.lucene.analysis.CharArraySet in project elasticsearch by elastic.
the class AnalysisTests method testParseStemExclusion.
public void testParseStemExclusion() {
/* Comma separated list */
Settings settings = Settings.builder().put("stem_exclusion", "foo,bar").build();
CharArraySet set = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
assertThat(set.contains("foo"), is(true));
assertThat(set.contains("bar"), is(true));
assertThat(set.contains("baz"), is(false));
/* Array */
settings = Settings.builder().putArray("stem_exclusion", "foo", "bar").build();
set = Analysis.parseStemExclusion(settings, CharArraySet.EMPTY_SET);
assertThat(set.contains("foo"), is(true));
assertThat(set.contains("bar"), is(true));
assertThat(set.contains("baz"), is(false));
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestFreeTextSuggester method testEndingHole.
// With one ending hole, ShingleFilter produces "of _" and
// we should properly predict from that:
public void testEndingHole() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field) {
Tokenizer tokenizer = new MockTokenizer();
CharArraySet stopSet = StopFilter.makeStopSet("of");
return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
}
};
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("wizard of oz", 50));
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("wizard _ oz/1.00", toString(sug.lookup("wizard of", 10)));
// Falls back to unigram model, with backoff 0.4 times
// prop 0.5:
assertEquals("oz/0.20", toString(sug.lookup("wizard o", 10)));
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestFreeTextSuggester method testTwoEndingHoles.
// If the number of ending holes exceeds the ngrams window
// then there are no predictions, because ShingleFilter
// does not produce e.g. a hole only "_ _" token:
public void testTwoEndingHoles() throws Exception {
// Just deletes "of"
Analyzer a = new Analyzer() {
@Override
public TokenStreamComponents createComponents(String field) {
Tokenizer tokenizer = new MockTokenizer();
CharArraySet stopSet = StopFilter.makeStopSet("of");
return new TokenStreamComponents(tokenizer, new StopFilter(tokenizer, stopSet));
}
};
Iterable<Input> keys = AnalyzingSuggesterTest.shuffle(new Input("wizard of of oz", 50));
FreeTextSuggester sug = new FreeTextSuggester(a, a, 3, (byte) 0x20);
sug.build(new InputArrayIterator(keys));
assertEquals("", toString(sug.lookup("wizard of of", 10)));
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestSuggestStopFilter method testMultipleStopWordsEnd2.
public void testMultipleStopWordsEnd2() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
Tokenizer stream = new MockTokenizer();
stream.setReader(new StringReader("go to a the "));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter, new String[] { "go" }, new int[] { 0 }, new int[] { 2 }, null, new int[] { 1 }, null, 12, new boolean[] { false }, true);
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestSuggestStopFilter method testMultipleStopWords.
public void testMultipleStopWords() throws Exception {
CharArraySet stopWords = StopFilter.makeStopSet("to", "the", "a");
Tokenizer stream = new MockTokenizer();
stream.setReader(new StringReader("go to a the school"));
TokenStream filter = new SuggestStopFilter(stream, stopWords);
filter = new SuggestStopFilter(stream, stopWords);
assertTokenStreamContents(filter, new String[] { "go", "school" }, new int[] { 0, 12 }, new int[] { 2, 18 }, null, new int[] { 1, 4 }, null, 18, new boolean[] { false, false }, true);
}
Aggregations