use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class SolrSuggester method init.
/**
* Uses the <code>config</code> and the <code>core</code> to initialize the underlying
* Lucene suggester
* */
public String init(NamedList<?> config, SolrCore core) {
LOG.info("init: " + config);
// read the config
name = config.get(NAME) != null ? (String) config.get(NAME) : DEFAULT_DICT_NAME;
sourceLocation = (String) config.get(LOCATION);
lookupImpl = (String) config.get(LOOKUP_IMPL);
dictionaryImpl = (String) config.get(DICTIONARY_IMPL);
String store = (String) config.get(STORE_DIR);
if (lookupImpl == null) {
lookupImpl = LookupFactory.DEFAULT_FILE_BASED_DICT;
LOG.info("No " + LOOKUP_IMPL + " parameter was provided falling back to " + lookupImpl);
}
contextFilterQueryAnalyzer = new TokenizerChain(new StandardTokenizerFactory(Collections.EMPTY_MAP), null);
// initialize appropriate lookup instance
factory = core.getResourceLoader().newInstance(lookupImpl, LookupFactory.class);
lookup = factory.create(config, core);
if (lookup != null && lookup instanceof Closeable) {
core.addCloseHook(new CloseHook() {
@Override
public void preClose(SolrCore core) {
try {
((Closeable) lookup).close();
} catch (IOException e) {
LOG.warn("Could not close the suggester lookup.", e);
}
}
@Override
public void postClose(SolrCore core) {
}
});
}
// if store directory is provided make it or load up the lookup with its content
if (store != null && !store.isEmpty()) {
storeDir = new File(store);
if (!storeDir.isAbsolute()) {
storeDir = new File(core.getDataDir() + File.separator + storeDir);
}
if (!storeDir.exists()) {
storeDir.mkdirs();
} else if (getStoreFile().exists()) {
if (LOG.isDebugEnabled()) {
LOG.debug("attempt reload of the stored lookup from file " + getStoreFile());
}
try {
lookup.load(new FileInputStream(getStoreFile()));
} catch (IOException e) {
LOG.warn("Loading stored lookup data failed, possibly not cached yet");
}
}
}
// dictionary configuration
if (dictionaryImpl == null) {
dictionaryImpl = (sourceLocation == null) ? DictionaryFactory.DEFAULT_INDEX_BASED_DICT : DictionaryFactory.DEFAULT_FILE_BASED_DICT;
LOG.info("No " + DICTIONARY_IMPL + " parameter was provided falling back to " + dictionaryImpl);
}
dictionaryFactory = core.getResourceLoader().newInstance(dictionaryImpl, DictionaryFactory.class);
dictionaryFactory.setParams(config);
LOG.info("Dictionary loaded with params: " + config);
return name;
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class MultiTermTest method testMultiFound.
@Test
public void testMultiFound() {
SchemaField field = h.getCore().getLatestSchema().getField("content_multi");
Analyzer analyzer = ((TextField) field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
}
analyzer = field.getType().getIndexAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof WhitespaceTokenizerFactory);
tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof TrimFilterFactory));
}
assertTrue(tc.getCharFilterFactories().length == 0);
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class MultiTermTest method testDefaultCopiedToMulti.
@Test
public void testDefaultCopiedToMulti() {
SchemaField field = h.getCore().getLatestSchema().getField("content_ws");
Analyzer analyzer = ((TextField) field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue((factory instanceof ASCIIFoldingFilterFactory) || (factory instanceof LowerCaseFilterFactory));
}
assertTrue(tc.getCharFilterFactories().length == 0);
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class FieldAnalysisRequestHandlerTest method testCustomAttribute.
//See SOLR-8460
@Test
public void testCustomAttribute() throws Exception {
FieldAnalysisRequest request = new FieldAnalysisRequest();
request.addFieldType("skutype1");
request.setFieldValue("hi, 3456-12 a Test");
request.setShowMatch(false);
FieldType fieldType = new TextField();
Analyzer analyzer = new TokenizerChain(new TokenizerFactory(Collections.emptyMap()) {
@Override
public Tokenizer create(AttributeFactory factory) {
return new CustomTokenizer(factory);
}
}, new TokenFilterFactory[] { new TokenFilterFactory(Collections.emptyMap()) {
@Override
public TokenStream create(TokenStream input) {
return new CustomTokenFilter(input);
}
} });
fieldType.setIndexAnalyzer(analyzer);
NamedList<NamedList> result = handler.analyzeValues(request, fieldType, "fieldNameUnused");
// just test that we see "900" in the flags attribute here
List<NamedList> tokenInfoList = (List<NamedList>) result.findRecursive("index", CustomTokenFilter.class.getName());
// '1' from CustomTokenFilter plus 900 from CustomFlagsAttributeImpl.
assertEquals(901, tokenInfoList.get(0).get("org.apache.lucene.analysis.tokenattributes.FlagsAttribute#flags"));
}
use of org.apache.solr.analysis.TokenizerChain in project lucene-solr by apache.
the class MultiTermTest method testQueryCopiedToMulti.
@Test
public void testQueryCopiedToMulti() {
SchemaField field = h.getCore().getLatestSchema().getField("content_charfilter");
Analyzer analyzer = ((TextField) field.getType()).getMultiTermAnalyzer();
assertTrue(analyzer instanceof TokenizerChain);
assertTrue(((TokenizerChain) analyzer).getTokenizerFactory() instanceof KeywordTokenizerFactory);
TokenizerChain tc = (TokenizerChain) analyzer;
for (TokenFilterFactory factory : tc.getTokenFilterFactories()) {
assertTrue(factory instanceof LowerCaseFilterFactory);
}
assertTrue(tc.getCharFilterFactories().length == 1);
assertTrue(tc.getCharFilterFactories()[0] instanceof MappingCharFilterFactory);
}
Aggregations