use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class SolrStopwordsCarrot2LexicalDataFactory method getSolrStopWordsForField.
/**
* Obtains stop words for a field from the associated
* {@link StopFilterFactory}, if any.
*/
private List<CharArraySet> getSolrStopWordsForField(String fieldName) {
// of this class are not used by multiple threads at a time.
synchronized (solrStopWords) {
if (!solrStopWords.containsKey(fieldName)) {
solrStopWords.put(fieldName, new ArrayList<>());
IndexSchema schema = core.getLatestSchema();
final Analyzer fieldAnalyzer = schema.getFieldType(fieldName).getIndexAnalyzer();
if (fieldAnalyzer instanceof TokenizerChain) {
final TokenFilterFactory[] filterFactories = ((TokenizerChain) fieldAnalyzer).getTokenFilterFactories();
for (TokenFilterFactory factory : filterFactories) {
if (factory instanceof StopFilterFactory) {
// StopFilterFactory holds the stop words in a CharArraySet
CharArraySet stopWords = ((StopFilterFactory) factory).getStopWords();
solrStopWords.get(fieldName).add(stopWords);
}
if (factory instanceof CommonGramsFilterFactory) {
CharArraySet commonWords = ((CommonGramsFilterFactory) factory).getCommonWords();
solrStopWords.get(fieldName).add(commonWords);
}
}
}
}
return solrStopWords.get(fieldName);
}
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class SolrQueryParserBase method getReversedWildcardFilterFactory.
protected ReversedWildcardFilterFactory getReversedWildcardFilterFactory(FieldType fieldType) {
if (leadingWildcards == null)
leadingWildcards = new HashMap<>();
ReversedWildcardFilterFactory fac = leadingWildcards.get(fieldType);
if (fac != null || leadingWildcards.containsKey(fieldType)) {
return fac;
}
Analyzer a = fieldType.getIndexAnalyzer();
if (a instanceof TokenizerChain) {
// examine the indexing analysis chain if it supports leading wildcards
TokenizerChain tc = (TokenizerChain) a;
TokenFilterFactory[] factories = tc.getTokenFilterFactories();
for (TokenFilterFactory factory : factories) {
if (factory instanceof ReversedWildcardFilterFactory) {
fac = (ReversedWildcardFilterFactory) factory;
break;
}
}
}
leadingWildcards.put(fieldType, fac);
return fac;
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class FieldTypePluginLoader method constructMultiTermAnalyzer.
// The point here is that, if no multiterm analyzer was specified in the schema file, do one of several things:
// 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
// lowercase filters and asciifoldingfilter.
// 2> If legacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
// Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
if (queryAnalyzer == null)
return null;
if (!(queryAnalyzer instanceof TokenizerChain)) {
return new KeywordAnalyzer();
}
TokenizerChain tc = (TokenizerChain) queryAnalyzer;
MultiTermChainBuilder builder = new MultiTermChainBuilder();
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
for (CharFilterFactory fact : charFactories) {
builder.add(fact);
}
builder.add(tc.getTokenizerFactory());
for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
builder.add(fact);
}
return builder.build();
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class PayloadUtils method getPayloadEncoder.
public static String getPayloadEncoder(FieldType fieldType) {
// TODO: support custom payload encoding fields too somehow - maybe someone has a custom component that encodes payloads as floats
String encoder = null;
Analyzer a = fieldType.getIndexAnalyzer();
if (a instanceof TokenizerChain) {
// examine the indexing analysis chain for DelimitedPayloadTokenFilterFactory or NumericPayloadTokenFilterFactory
TokenizerChain tc = (TokenizerChain) a;
TokenFilterFactory[] factories = tc.getTokenFilterFactories();
for (TokenFilterFactory factory : factories) {
if (factory instanceof DelimitedPayloadTokenFilterFactory) {
encoder = factory.getOriginalArgs().get(DelimitedPayloadTokenFilterFactory.ENCODER_ATTR);
break;
}
if (factory instanceof NumericPayloadTokenFilterFactory) {
// encodes using `PayloadHelper.encodeFloat(payload)`
encoder = "float";
break;
}
}
}
return encoder;
}
Aggregations