use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class FieldAnalysisRequestHandlerTest method testCustomAttribute.
//See SOLR-8460
@Test
public void testCustomAttribute() throws Exception {
FieldAnalysisRequest request = new FieldAnalysisRequest();
request.addFieldType("skutype1");
request.setFieldValue("hi, 3456-12 a Test");
request.setShowMatch(false);
FieldType fieldType = new TextField();
Analyzer analyzer = new TokenizerChain(new TokenizerFactory(Collections.emptyMap()) {
@Override
public Tokenizer create(AttributeFactory factory) {
return new CustomTokenizer(factory);
}
}, new TokenFilterFactory[] { new TokenFilterFactory(Collections.emptyMap()) {
@Override
public TokenStream create(TokenStream input) {
return new CustomTokenFilter(input);
}
} });
fieldType.setIndexAnalyzer(analyzer);
NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
// just test that we see "900" in the flags attribute here
List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
// '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class MultiTermTest method testQueryCopiedToMulti.
@Test
public void testQueryCopiedToMulti() {
SchemaField field = h.getCore().getLatestSchema().getField("content_charfilter");
Analyzer analyzer = ((TextField) field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue(factory instanceof LowerCaseFilterFactory);
}
assertTrue(tc.getCharFilterFactories().length == 1);
assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class SolrStopwordsCarrot2LexicalDataFactory method getSolrStopWordsForField.
/**
* Obtains stop words for a field from the associated
* {@link StopFilterFactory}, if any.
*/
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
// of this class are not used by multiple threads at a time.
synchronized (solrStopWords) {
if (!solrStopWords.containsKey(fieldName)) {
solrStopWords.put(fieldName, new ArrayList<>());
IndexSchema schema = core.getLatestSchema();
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
if (fieldAnalyzer instanceof TokenizerChain) {
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
solrStopWords.get(fieldName).add(stopWords);
}
if (factory instanceof CommonGramsFilterFactory) {
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
solrStopWords.get(fieldName).add(commonWords);
}
}
}
}
return solrStopWords.get(fieldName);
}
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class SolrQueryParserBase method getReversedWildcardFilterFactory.
protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
if (leadingWildcards == null)
leadingWildcards = new HashMap<>();
ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
if (fac != null || leadingWildcards.containsKey(fieldType)) {
return fac;
}
Analyzer a = fieldType.getIndexAnalyzer();
if (a instanceof TokenizerChain) {
// examine the indexing analysis chain if it supports leading wildcards
TokenizerChain tc = (TokenizerChain) a;
TokenFilterFactory[] factories = tc.getTokenFilterFactories();
for (TokenFilterFactory factory : factories) {
if (factory instanceof ReversedWildcardFilterFactory) {
fac = (ReversedWildcardFilterFactory) factory;
break;
}
}
}
leadingWildcards.put(fieldType, fac);
return fac;
}
use of org.apache.lucene.analysis.util.TokenFilterFactory in project lucene-solr by apache.
the class FieldTypePluginLoader method constructMultiTermAnalyzer.
// The point here is that, if no multiterm analyzer was specified in the schema file, do one of several things:
// 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
// lowercase filters and asciifoldingfilter.
// 2> If legacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
// Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
if (queryAnalyzer == null)
return null;
if (!(queryAnalyzer instanceof TokenizerChain)) {
return new KeywordAnalyzer();
}
TokenizerChain tc = (TokenizerChain) queryAnalyzer;
MultiTermChainBuilder builder = new MultiTermChainBuilder();
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
for (CharFilterFactory fact : charFactories) {
builder.add(fact);
}
builder.add(tc.getTokenizerFactory());
for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
builder.add(fact);
}
return builder.build();
}
Aggregations