use of org.apache.lucene.analysis.core.StopFilterFactory in project jackrabbit-oak by apache.
the class NodeStateAnalyzerFactoryTest method analyzerByComposition_FileResource.
@Test
public void analyzerByComposition_FileResource() throws Exception {
NodeBuilder nb = EMPTY_NODE.builder();
nb.child(ANL_TOKENIZER).setProperty(ANL_NAME, "whitespace");
NodeBuilder filters = nb.child(ANL_FILTERS);
// name is optional. Derived from nodeName
NodeBuilder stop = filters.child("stop");
stop.setProperty("words", "set1.txt, set2.txt");
createFileNode(stop, "set1.txt", newCharArraySet("foo", "bar"));
createFileNode(stop, "set2.txt", newCharArraySet("foo1", "bar1"));
TokenizerChain analyzer = (TokenizerChain) factory.createInstance(nb.getNodeState());
assertEquals(1, analyzer.getFilters().length);
// check the order
assertEquals(StopFilterFactory.class.getName(), analyzer.getFilters()[0].getClassArg());
StopFilterFactory sff = (StopFilterFactory) analyzer.getFilters()[0];
assertTrue(sff.getStopWords().contains("foo"));
assertTrue(sff.getStopWords().contains("foo1"));
}
use of org.apache.lucene.analysis.core.StopFilterFactory in project lucene-solr by apache.
the class SolrStopwordsCarrot2LexicalDataFactory method getSolrStopWordsForField.
/**
* Obtains stop words for a field from the associated
* {@link StopFilterFactory}, if any.
*/
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
// of this class are not used by multiple threads at a time.
synchronized (solrStopWords) {
if (!solrStopWords.containsKey(fieldName)) {
solrStopWords.put(fieldName, new ArrayList<>());
IndexSchema schema = core.getLatestSchema();
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
if (fieldAnalyzer instanceof TokenizerChain) {
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
solrStopWords.get(fieldName).add(stopWords);
}
if (factory instanceof CommonGramsFilterFactory) {
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
solrStopWords.get(fieldName).add(commonWords);
}
}
}
}
return solrStopWords.get(fieldName);
}
}
Aggregations