use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestGalicianAnalyzer method testExclude.
/** test use of exclusion set */
public void testExclude() throws IOException {
CharArraySet exclusionSet = new CharArraySet(asSet("correspondente"), false);
Analyzer a = new GalicianAnalyzer(GalicianAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "correspondente", "correspondente");
checkOneTerm(a, "corresponderá", "correspond");
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestPorterStemFilter method testWithKeywordAttribute.
public void testWithKeywordAttribute() throws IOException {
CharArraySet set = new CharArraySet(1, true);
set.add("yourselves");
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
tokenizer.setReader(new StringReader("yourselves yours"));
TokenStream filter = new PorterStemFilter(new SetKeywordMarkerFilter(tokenizer, set));
assertTokenStreamContents(filter, new String[] { "yourselves", "your" });
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestFinnishAnalyzer method testExclude.
/** test use of exclusion set */
public void testExclude() throws IOException {
CharArraySet exclusionSet = new CharArraySet(asSet("edeltäjistään"), false);
Analyzer a = new FinnishAnalyzer(FinnishAnalyzer.getDefaultStopSet(), exclusionSet);
checkOneTerm(a, "edeltäjiinsä", "edeltäj");
checkOneTerm(a, "edeltäjistään", "edeltäjistään");
a.close();
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestWordDelimiterGraphFilter method testRandomHugeStrings.
/** blast some enormous random strings through the analyzer */
public void testRandomHugeStrings() throws Exception {
int numIterations = atLeast(5);
for (int i = 0; i < numIterations; i++) {
final int flags = random().nextInt(512);
final CharArraySet protectedWords;
if (random().nextBoolean()) {
protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
} else {
protectedWords = null;
}
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.WHITESPACE, false);
TokenStream wdgf = new WordDelimiterGraphFilter(tokenizer, flags, protectedWords);
return new TokenStreamComponents(tokenizer, wdgf);
}
};
// TODO: properly support positionLengthAttribute
checkRandomData(random(), a, 20 * RANDOM_MULTIPLIER, 8192, false, false);
a.close();
}
}
use of org.apache.lucene.analysis.CharArraySet in project lucene-solr by apache.
the class TestWordDelimiterFilter method testEmptyTerm.
public void testEmptyTerm() throws IOException {
Random random = random();
for (int i = 0; i < 512; i++) {
final int flags = i;
final CharArraySet protectedWords;
if (random.nextBoolean()) {
protectedWords = new CharArraySet(new HashSet<>(Arrays.asList("a", "b", "cd")), false);
} else {
protectedWords = null;
}
Analyzer a = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new KeywordTokenizer();
return new TokenStreamComponents(tokenizer, new WordDelimiterFilter(tokenizer, flags, protectedWords));
}
};
// depending upon options, this thing may or may not preserve the empty term
checkAnalysisConsistency(random, a, random.nextBoolean(), "");
a.close();
}
}
Aggregations