use of org.carrot2.util.resource.IResource in project lucene-solr by apache.
the class CarrotClusteringEngine method init.
@Override
@SuppressWarnings("rawtypes")
public String init(NamedList config, final SolrCore core) {
this.core = core;
String result = super.init(config, core);
final SolrParams initParams = SolrParams.toSolrParams(config);
// Initialization attributes for Carrot2 controller.
HashMap<String, Object> initAttributes = new HashMap<>();
// Customize Carrot2's resource lookup to first look for resources
// using Solr's resource loader. If that fails, try loading from the classpath.
ResourceLookup resourceLookup = new ResourceLookup(// Solr-specific resource loading.
new SolrResourceLocator(core, initParams), // Using the class loader directly because this time we want to omit the prefix
new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(resourceLookup);
// Make sure the requested Carrot2 clustering algorithm class is available
String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
try {
this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
} catch (SolrException s) {
if (!(s.getCause() instanceof ClassNotFoundException)) {
throw s;
}
}
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
log.info("Initializing Clustering Engine '" + MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
if (!Strings.isNullOrEmpty(componentName)) {
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
if (attributeXmls.length > 0) {
if (attributeXmls.length > 1) {
log.warn("More than one attribute file found, first one will be used: " + Arrays.toString(attributeXmls));
}
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
initAttributes.putAll(defaultSet.getAttributeValues());
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not read attributes XML for clustering component: " + componentName, e);
} finally {
ct.setContextClassLoader(prev);
}
}
}
// Extract solrconfig attributes, they take precedence.
extractCarrotAttributes(initParams, initAttributes);
// Customize the stemmer and tokenizer factories. The implementations we provide here
// are included in the code base of Solr, so that it's possible to refactor
// the Lucene APIs the factories rely on if needed.
// Additionally, we set a custom lexical resource factory for Carrot2 that
// will use both Carrot2 default stop words as well as stop words from
// the StopFilter defined on the field.
final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes);
attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) {
attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
}
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) {
attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
}
// Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
initAttributes.put("solrCore", core);
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
this.controller.init(initAttributes);
} finally {
ct.setContextClassLoader(prev);
}
SchemaField uniqueField = core.getLatestSchema().getUniqueKeyField();
if (uniqueField == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotClusteringEngine.class.getSimpleName() + " requires the schema to have a uniqueKeyField");
}
this.idFieldName = uniqueField.getName();
return result;
}
use of org.carrot2.util.resource.IResource in project lucene-solr by apache.
the class SolrResourceLocator method getAll.
@Override
public IResource[] getAll(final String resource) {
final String resourceName = carrot2ResourcesDir + "/" + resource;
log.debug("Looking for Solr resource: " + resourceName);
InputStream resourceStream = null;
final byte[] asBytes;
try {
resourceStream = resourceLoader.openResource(resourceName);
asBytes = IOUtils.toByteArray(resourceStream);
} catch (IOException e) {
log.debug("Resource not found in Solr's config: " + resourceName + ". Using the default " + resource + " from Carrot JAR.");
return new IResource[] {};
} finally {
if (resourceStream != null) {
try {
resourceStream.close();
} catch (IOException e) {
// ignore.
}
}
}
log.info("Loaded Solr resource: " + resourceName);
final IResource foundResource = new IResource() {
@Override
public InputStream open() {
return new ByteArrayInputStream(asBytes);
}
@Override
public int hashCode() {
// so simply rely on instance equivalence.
return super.hashCode();
}
@Override
public boolean equals(Object obj) {
// so simply rely on instance equivalence.
return super.equals(obj);
}
@Override
public String toString() {
return "Solr config resource: " + resourceName;
}
};
return new IResource[] { foundResource };
}
Aggregations