use of org.apache.lucene.analysis.synonym.SynonymFilter in project lucene-solr by apache.
the class TestLimitTokenPositionFilter method testMaxPosition3WithSynomyms.
public void testMaxPosition3WithSynomyms() throws IOException {
for (final boolean consumeAll : new boolean[] { true, false }) {
MockTokenizer tokenizer = whitespaceMockTokenizer("one two three four five");
// if we are consuming all tokens, we can use the checks, otherwise we can't
tokenizer.setEnableChecks(consumeAll);
SynonymMap.Builder builder = new SynonymMap.Builder(true);
builder.add(new CharsRef("one"), new CharsRef("first"), true);
builder.add(new CharsRef("one"), new CharsRef("alpha"), true);
builder.add(new CharsRef("one"), new CharsRef("beguine"), true);
CharsRefBuilder multiWordCharsRef = new CharsRefBuilder();
SynonymMap.Builder.join(new String[] { "and", "indubitably", "single", "only" }, multiWordCharsRef);
builder.add(new CharsRef("one"), multiWordCharsRef.get(), true);
SynonymMap.Builder.join(new String[] { "dopple", "ganger" }, multiWordCharsRef);
builder.add(new CharsRef("two"), multiWordCharsRef.get(), true);
SynonymMap synonymMap = builder.build();
TokenStream stream = new SynonymFilter(tokenizer, synonymMap, true);
stream = new LimitTokenPositionFilter(stream, 3, consumeAll);
// "only", the 4th word of multi-word synonym "and indubitably single only" is not emitted, since its position is greater than 3.
assertTokenStreamContents(stream, new String[] { "one", "first", "alpha", "beguine", "and", "two", "indubitably", "dopple", "three", "single", "ganger" }, new int[] { 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0 });
}
}
use of org.apache.lucene.analysis.synonym.SynonymFilter in project lucene-solr by apache.
the class TestRemoveDuplicatesTokenFilter method testRandomStrings.
/** blast some random strings through the analyzer */
public void testRandomStrings() throws Exception {
final int numIters = atLeast(10);
for (int i = 0; i < numIters; i++) {
SynonymMap.Builder b = new SynonymMap.Builder(random().nextBoolean());
final int numEntries = atLeast(10);
for (int j = 0; j < numEntries; j++) {
add(b, randomNonEmptyString(), randomNonEmptyString(), random().nextBoolean());
}
final SynonymMap map = b.build();
final boolean ignoreCase = random().nextBoolean();
final Analyzer analyzer = new Analyzer() {
@Override
protected TokenStreamComponents createComponents(String fieldName) {
Tokenizer tokenizer = new MockTokenizer(MockTokenizer.SIMPLE, true);
TokenStream stream = new SynonymFilter(tokenizer, map, ignoreCase);
return new TokenStreamComponents(tokenizer, new RemoveDuplicatesTokenFilter(stream));
}
};
checkRandomData(random(), analyzer, 200);
analyzer.close();
}
}
use of org.apache.lucene.analysis.synonym.SynonymFilter in project crate by crate.
the class SynonymTokenFilterFactory method getChainAwareTokenFilterFactory.
@Override
public TokenFilterFactory getChainAwareTokenFilterFactory(TokenizerFactory tokenizer, List<CharFilterFactory> charFilters, List<TokenFilterFactory> previousTokenFilters, Function<String, TokenFilterFactory> allFilters) {
final Analyzer analyzer = buildSynonymAnalyzer(tokenizer, charFilters, previousTokenFilters);
final SynonymMap synonyms = buildSynonyms(analyzer, getRulesFromSettings(environment));
final String name = name();
return new TokenFilterFactory() {
@Override
public String name() {
return name;
}
@Override
public TokenStream create(TokenStream tokenStream) {
return synonyms.fst == null ? tokenStream : new SynonymFilter(tokenStream, synonyms, false);
}
};
}
Aggregations