use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class TestFactories method doTestCharFilter.
private void doTestCharFilter(String charfilter) throws IOException {
Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
if (factory != null) {
// if it implements MultiTermAware, sanity check its impl
if (factory instanceof MultiTermAwareComponent) {
AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
assertNotNull(mtc);
// it's not ok to return a tokenizer or tokenfilter here, this makes no sense
assertTrue(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class FieldTypePluginLoader method constructMultiTermAnalyzer.
// The point here is that, if no multiterm analyzer was specified in the schema file, do one of several things:
// 1> If legacyMultiTerm == false, assemble a new analyzer composed of all of the charfilters,
// lowercase filters and asciifoldingfilter.
// 2> If legacyMultiTerm == true just construct the analyzer from a KeywordTokenizer. That should mimic current behavior.
// Do the same if they've specified that the old behavior is required (legacyMultiTerm="true")
private Analyzer constructMultiTermAnalyzer(Analyzer queryAnalyzer) {
if (queryAnalyzer == null)
return null;
if (!(queryAnalyzer instanceof TokenizerChain)) {
return new KeywordAnalyzer();
}
TokenizerChain tc = (TokenizerChain) queryAnalyzer;
MultiTermChainBuilder builder = new MultiTermChainBuilder();
CharFilterFactory[] charFactories = tc.getCharFilterFactories();
for (CharFilterFactory fact : charFactories) {
builder.add(fact);
}
builder.add(tc.getTokenizerFactory());
for (TokenFilterFactory fact : tc.getTokenFilterFactories()) {
builder.add(fact);
}
return builder.build();
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class AnalyzerFactory method toString.
@Override
public String toString() {
StringBuilder sb = new StringBuilder("AnalyzerFactory(");
if (null != name) {
sb.append("name:");
sb.append(name);
sb.append(", ");
}
if (null != positionIncrementGap) {
sb.append("positionIncrementGap:");
sb.append(positionIncrementGap);
sb.append(", ");
}
if (null != offsetGap) {
sb.append("offsetGap:");
sb.append(offsetGap);
sb.append(", ");
}
for (CharFilterFactory charFilterFactory : charFilterFactories) {
sb.append(charFilterFactory);
sb.append(", ");
}
sb.append(tokenizerFactory);
for (TokenFilterFactory tokenFilterFactory : tokenFilterFactories) {
sb.append(", ");
sb.append(tokenFilterFactory);
}
sb.append(')');
return sb.toString();
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class TestAllAnalyzersHaveFactories method test.
public void test() throws Exception {
List<Class<?>> analysisClasses = TestRandomChains.getClassesForPackage("org.apache.lucene.analysis");
for (final Class<?> c : analysisClasses) {
final int modifiers = c.getModifiers();
if (// don't waste time with abstract classes
Modifier.isAbstract(modifiers) || !Modifier.isPublic(modifiers) || c.isSynthetic() || c.isAnonymousClass() || c.isMemberClass() || c.isInterface() || testComponents.contains(c) || crazyComponents.contains(c) || oddlyNamedComponents.contains(c) || tokenFiltersWithoutFactory.contains(c) || // deprecated ones are typically back compat hacks
c.isAnnotationPresent(Deprecated.class) || !(Tokenizer.class.isAssignableFrom(c) || TokenFilter.class.isAssignableFrom(c) || CharFilter.class.isAssignableFrom(c))) {
continue;
}
Map<String, String> args = new HashMap<>();
args.put("luceneMatchVersion", Version.LATEST.toString());
if (Tokenizer.class.isAssignableFrom(c)) {
String clazzName = c.getSimpleName();
assertTrue(clazzName.endsWith("Tokenizer"));
String simpleName = clazzName.substring(0, clazzName.length() - 9);
assertNotNull(TokenizerFactory.lookupClass(simpleName));
TokenizerFactory instance = null;
try {
instance = TokenizerFactory.forName(simpleName, args);
assertNotNull(instance);
if (instance instanceof ResourceLoaderAware) {
((ResourceLoaderAware) instance).inform(loader);
}
assertSame(c, instance.create().getClass());
} catch (IllegalArgumentException e) {
// TODO: For now pass because some factories have not yet a default config that always works
}
} else if (TokenFilter.class.isAssignableFrom(c)) {
String clazzName = c.getSimpleName();
assertTrue(clazzName.endsWith("Filter"));
String simpleName = clazzName.substring(0, clazzName.length() - (clazzName.endsWith("TokenFilter") ? 11 : 6));
assertNotNull(TokenFilterFactory.lookupClass(simpleName));
TokenFilterFactory instance = null;
try {
instance = TokenFilterFactory.forName(simpleName, args);
assertNotNull(instance);
if (instance instanceof ResourceLoaderAware) {
((ResourceLoaderAware) instance).inform(loader);
}
Class<? extends TokenStream> createdClazz = instance.create(new KeywordTokenizer()).getClass();
// only check instance if factory have wrapped at all!
if (KeywordTokenizer.class != createdClazz) {
assertSame(c, createdClazz);
}
} catch (IllegalArgumentException e) {
// TODO: For now pass because some factories have not yet a default config that always works
}
} else if (CharFilter.class.isAssignableFrom(c)) {
String clazzName = c.getSimpleName();
assertTrue(clazzName.endsWith("CharFilter"));
String simpleName = clazzName.substring(0, clazzName.length() - 10);
assertNotNull(CharFilterFactory.lookupClass(simpleName));
CharFilterFactory instance = null;
try {
instance = CharFilterFactory.forName(simpleName, args);
assertNotNull(instance);
if (instance instanceof ResourceLoaderAware) {
((ResourceLoaderAware) instance).inform(loader);
}
Class<? extends Reader> createdClazz = instance.create(new StringReader("")).getClass();
// only check instance if factory have wrapped at all!
if (StringReader.class != createdClazz) {
assertSame(c, createdClazz);
}
} catch (IllegalArgumentException e) {
// TODO: For now pass because some factories have not yet a default config that always works
}
}
}
}
use of org.apache.lucene.analysis.util.CharFilterFactory in project lucene-solr by apache.
the class TestFactories method doTestCharFilter.
private void doTestCharFilter(String charfilter) throws IOException {
Class<? extends CharFilterFactory> factoryClazz = CharFilterFactory.lookupClass(charfilter);
CharFilterFactory factory = (CharFilterFactory) initialize(factoryClazz);
if (factory != null) {
// if it implements MultiTermAware, sanity check its impl
if (factory instanceof MultiTermAwareComponent) {
AbstractAnalysisFactory mtc = ((MultiTermAwareComponent) factory).getMultiTermComponent();
assertNotNull(mtc);
// it's not ok to return a tokenizer or tokenfilter here, this makes no sense
assertTrue(mtc instanceof CharFilterFactory);
}
// beast it just a little, it shouldnt throw exceptions:
// (it should have thrown them in initialize)
Analyzer a = new FactoryAnalyzer(assertingTokenizer, null, factory);
checkRandomData(random(), a, 20, 20, false, false);
a.close();
}
}
Aggregations