use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class TestSuggestStopFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
SuggestStopFilterFactory factory = createFactory("words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = createFactory("words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = createFactory("words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getStopWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
// defaults
factory = createFactory();
assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
assertEquals(false, factory.isIgnoreCase());
}
use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class ManagedSynonymGraphFilterFactory method onManagedResourceInitialized.
/**
* Called once, during core initialization, to initialize any analysis components
* that depend on the data managed by this resource. It is important that the
* analysis component is only initialized once during core initialization so that
* text analysis is consistent, especially in a distributed environment, as we
* don't want one server applying a different set of stop words than other servers.
*/
@SuppressWarnings("unchecked")
@Override
public void onManagedResourceInitialized(NamedList<?> initArgs, final ManagedResource res) throws SolrException {
NamedList<Object> args = (NamedList<Object>) initArgs;
args.add("synonyms", getResourceId());
args.add("expand", "false");
args.add("format", "solr");
Map<String, String> filtArgs = new HashMap<>();
for (Map.Entry<String, ?> entry : args) {
filtArgs.put(entry.getKey(), entry.getValue().toString());
}
// create the actual filter factory that pulls the synonym mappings
// from synonymMappings using a custom parser implementation
delegate = new SynonymGraphFilterFactory(filtArgs) {
@Override
protected SynonymMap loadSynonyms(ResourceLoader loader, String cname, boolean dedup, Analyzer analyzer) throws IOException, ParseException {
ManagedSynonymParser parser = new ManagedSynonymParser((SynonymManager) res, dedup, analyzer);
// null is safe here because there's no actual parsing done against a input Reader
parser.parse(null);
return parser.build();
}
};
try {
delegate.inform(res.getResourceLoader());
} catch (IOException e) {
throw new SolrException(ErrorCode.SERVER_ERROR, e);
}
}
use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class XPathEntityProcessor method initXpathReader.
private void initXpathReader(VariableResolver resolver) {
reinitXPathReader = false;
useSolrAddXml = Boolean.parseBoolean(context.getEntityAttribute(USE_SOLR_ADD_SCHEMA));
streamRows = Boolean.parseBoolean(context.getEntityAttribute(STREAM));
if (context.getResolvedEntityAttribute("batchSize") != null) {
blockingQueueSize = Integer.parseInt(context.getEntityAttribute("batchSize"));
}
if (context.getResolvedEntityAttribute("readTimeOut") != null) {
blockingQueueTimeOut = Integer.parseInt(context.getEntityAttribute("readTimeOut"));
}
String xslt = context.getEntityAttribute(XSL);
if (xslt != null) {
xslt = context.replaceTokens(xslt);
try {
// create an instance of TransformerFactory
TransformerFactory transFact = TransformerFactory.newInstance();
final SolrCore core = context.getSolrCore();
final StreamSource xsltSource;
if (core != null) {
final ResourceLoader loader = core.getResourceLoader();
transFact.setURIResolver(new SystemIdResolver(loader).asURIResolver());
xsltSource = new StreamSource(loader.openResource(xslt), SystemIdResolver.createSystemIdFromResourceName(xslt));
} else {
// fallback for tests
xsltSource = new StreamSource(xslt);
}
transFact.setErrorListener(xmllog);
try {
xslTransformer = transFact.newTransformer(xsltSource);
} finally {
// some XML parsers are broken and don't close the byte stream (but they should according to spec)
IOUtils.closeQuietly(xsltSource.getInputStream());
}
LOG.info("Using xslTransformer: " + xslTransformer.getClass().getName());
} catch (Exception e) {
throw new DataImportHandlerException(SEVERE, "Error initializing XSL ", e);
}
}
if (useSolrAddXml) {
// Support solr add documents
xpathReader = new XPathRecordReader("/add/doc");
xpathReader.addField("name", "/add/doc/field/@name", true);
xpathReader.addField("value", "/add/doc/field", true);
} else {
String forEachXpath = context.getResolvedEntityAttribute(FOR_EACH);
if (forEachXpath == null)
throw new DataImportHandlerException(SEVERE, "Entity : " + context.getEntityAttribute("name") + " must have a 'forEach' attribute");
if (forEachXpath.equals(context.getEntityAttribute(FOR_EACH)))
reinitXPathReader = true;
try {
xpathReader = new XPathRecordReader(forEachXpath);
for (Map<String, String> field : context.getAllEntityFields()) {
if (field.get(XPATH) == null)
continue;
int flags = 0;
if ("true".equals(field.get("flatten"))) {
flags = XPathRecordReader.FLATTEN;
}
String xpath = field.get(XPATH);
xpath = context.replaceTokens(xpath);
//for each xml
if (!xpath.equals(field.get(XPATH)) && !context.isRootEntity())
reinitXPathReader = true;
xpathReader.addField(field.get(DataImporter.COLUMN), xpath, Boolean.parseBoolean(field.get(DataImporter.MULTI_VALUED)), flags);
}
} catch (RuntimeException e) {
throw new DataImportHandlerException(SEVERE, "Exception while reading xpaths for fields", e);
}
}
String url = context.getEntityAttribute(URL);
List<String> l = url == null ? Collections.EMPTY_LIST : resolver.getVariables(url);
for (String s : l) {
if (s.startsWith(entityName + ".")) {
if (placeHolderVariables == null)
placeHolderVariables = new ArrayList<>();
placeHolderVariables.add(s.substring(entityName.length() + 1));
}
}
for (Map<String, String> fld : context.getAllEntityFields()) {
if (fld.get(COMMON_FIELD) != null && "true".equals(fld.get(COMMON_FIELD))) {
if (commonFields == null)
commonFields = new ArrayList<>();
commonFields.add(fld.get(DataImporter.COLUMN));
}
}
}
use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class TestStopFilterFactory method testInform.
public void testInform() throws Exception {
ResourceLoader loader = new ClasspathResourceLoader(getClass());
assertTrue("loader is null and it shouldn't be", loader != null);
StopFilterFactory factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt", "ignoreCase", "true");
CharArraySet words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 2, words.size() == 2);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-1.txt, stop-2.txt", "ignoreCase", "true");
words = factory.getStopWords();
assertTrue("words is null and it shouldn't be", words != null);
assertTrue("words Size: " + words.size() + " is not: " + 4, words.size() == 4);
assertTrue(factory.isIgnoreCase() + " does not equal: " + true, factory.isIgnoreCase() == true);
factory = (StopFilterFactory) tokenFilterFactory("Stop", "words", "stop-snowball.txt", "format", "snowball", "ignoreCase", "true");
words = factory.getStopWords();
assertEquals(8, words.size());
assertTrue(words.contains("he"));
assertTrue(words.contains("him"));
assertTrue(words.contains("his"));
assertTrue(words.contains("himself"));
assertTrue(words.contains("she"));
assertTrue(words.contains("her"));
assertTrue(words.contains("hers"));
assertTrue(words.contains("herself"));
// defaults
factory = (StopFilterFactory) tokenFilterFactory("Stop");
assertEquals(StopAnalyzer.ENGLISH_STOP_WORDS_SET, factory.getStopWords());
assertEquals(false, factory.isIgnoreCase());
}
use of org.apache.lucene.analysis.util.ResourceLoader in project lucene-solr by apache.
the class TestMorfologikFilterFactory method testMissingDictionary.
public void testMissingDictionary() throws Exception {
final ResourceLoader loader = new ClasspathResourceLoader(TestMorfologikFilterFactory.class);
IOException expected = expectThrows(IOException.class, () -> {
Map<String, String> params = new HashMap<>();
params.put(MorfologikFilterFactory.DICTIONARY_ATTRIBUTE, "missing-dictionary-resource.dict");
MorfologikFilterFactory factory = new MorfologikFilterFactory(params);
factory.inform(loader);
});
assertTrue(expected.getMessage().contains("Resource not found"));
}
Aggregations