use of org.apache.solr.common.SolrException in project lucene-solr by apache.
the class CarrotClusteringEngine method cluster.
@Override
public Object cluster(Query query, SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, SolrQueryRequest sreq) {
try {
// Prepare attributes for Carrot2 clustering call
Map<String, Object> attributes = new HashMap<>();
List<Document> documents = getDocuments(solrDocList, docIds, query, sreq);
attributes.put(AttributeNames.DOCUMENTS, documents);
attributes.put(AttributeNames.QUERY, query.toString());
// Pass the fields on which clustering runs.
attributes.put("solrFieldNames", getFieldsForClustering(sreq));
// Pass extra overriding attributes from the request, if any
extractCarrotAttributes(sreq.getParams(), attributes);
// Perform clustering and convert to an output structure of clusters.
//
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at runtime.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
return clustersToNamedList(controller.process(attributes, clusteringAlgorithmClass).getClusters(), sreq.getParams());
} finally {
ct.setContextClassLoader(prev);
}
} catch (Exception e) {
log.error("Carrot2 clustering failed", e);
throw new SolrException(ErrorCode.SERVER_ERROR, "Carrot2 clustering failed", e);
}
}
use of org.apache.solr.common.SolrException in project lucene-solr by apache.
the class CarrotClusteringEngine method getFieldsForClustering.
/**
* Returns the names of fields that will be delivering the actual
* content for clustering. Currently, there are two such fields: document
* title and document content.
*/
private Set<String> getFieldsForClustering(SolrQueryRequest sreq) {
SolrParams solrParams = sreq.getParams();
String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
if (StringUtils.isBlank(snippetFieldSpec)) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotParams.SNIPPET_FIELD_NAME + " must not be blank.");
}
final Set<String> fields = new HashSet<>();
fields.addAll(Arrays.asList(titleFieldSpec.split("[, ]")));
fields.addAll(Arrays.asList(snippetFieldSpec.split("[, ]")));
return fields;
}
use of org.apache.solr.common.SolrException in project lucene-solr by apache.
the class CarrotClusteringEngine method init.
@Override
@SuppressWarnings("rawtypes")
public String init(NamedList config, final SolrCore core) {
this.core = core;
String result = super.init(config, core);
final SolrParams initParams = SolrParams.toSolrParams(config);
// Initialization attributes for Carrot2 controller.
HashMap<String, Object> initAttributes = new HashMap<>();
// Customize Carrot2's resource lookup to first look for resources
// using Solr's resource loader. If that fails, try loading from the classpath.
ResourceLookup resourceLookup = new ResourceLookup(// Solr-specific resource loading.
new SolrResourceLocator(core, initParams), // Using the class loader directly because this time we want to omit the prefix
new ClassLoaderLocator(core.getResourceLoader().getClassLoader()));
DefaultLexicalDataFactoryDescriptor.attributeBuilder(initAttributes).resourceLookup(resourceLookup);
// Make sure the requested Carrot2 clustering algorithm class is available
String carrotAlgorithmClassName = initParams.get(CarrotParams.ALGORITHM);
try {
this.clusteringAlgorithmClass = core.getResourceLoader().findClass(carrotAlgorithmClassName, IClusteringAlgorithm.class);
} catch (SolrException s) {
if (!(s.getCause() instanceof ClassNotFoundException)) {
throw s;
}
}
// Load Carrot2-Workbench exported attribute XMLs based on the 'name' attribute
// of this component. This by-name convention lookup is used to simplify configuring algorithms.
String componentName = initParams.get(ClusteringEngine.ENGINE_NAME);
log.info("Initializing Clustering Engine '" + MoreObjects.firstNonNull(componentName, "<no 'name' attribute>") + "'");
if (!Strings.isNullOrEmpty(componentName)) {
IResource[] attributeXmls = resourceLookup.getAll(componentName + "-attributes.xml");
if (attributeXmls.length > 0) {
if (attributeXmls.length > 1) {
log.warn("More than one attribute file found, first one will be used: " + Arrays.toString(attributeXmls));
}
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
AttributeValueSets avs = AttributeValueSets.deserialize(attributeXmls[0].open());
AttributeValueSet defaultSet = avs.getDefaultAttributeValueSet();
initAttributes.putAll(defaultSet.getAttributeValues());
} catch (Exception e) {
throw new SolrException(ErrorCode.SERVER_ERROR, "Could not read attributes XML for clustering component: " + componentName, e);
} finally {
ct.setContextClassLoader(prev);
}
}
}
// Extract solrconfig attributes, they take precedence.
extractCarrotAttributes(initParams, initAttributes);
// Customize the stemmer and tokenizer factories. The implementations we provide here
// are included in the code base of Solr, so that it's possible to refactor
// the Lucene APIs the factories rely on if needed.
// Additionally, we set a custom lexical resource factory for Carrot2 that
// will use both Carrot2 default stop words as well as stop words from
// the StopFilter defined on the field.
final AttributeBuilder attributeBuilder = BasicPreprocessingPipelineDescriptor.attributeBuilder(initAttributes);
attributeBuilder.lexicalDataFactory(SolrStopwordsCarrot2LexicalDataFactory.class);
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.TOKENIZER_FACTORY)) {
attributeBuilder.tokenizerFactory(LuceneCarrot2TokenizerFactory.class);
}
if (!initAttributes.containsKey(BasicPreprocessingPipelineDescriptor.Keys.STEMMER_FACTORY)) {
attributeBuilder.stemmerFactory(LuceneCarrot2StemmerFactory.class);
}
// Pass the schema (via the core) to SolrStopwordsCarrot2LexicalDataFactory.
initAttributes.put("solrCore", core);
// Carrot2 uses current thread's context class loader to get
// certain classes (e.g. custom tokenizer/stemmer) at initialization time.
// To make sure classes from contrib JARs are available,
// we swap the context class loader for the time of clustering.
Thread ct = Thread.currentThread();
ClassLoader prev = ct.getContextClassLoader();
try {
ct.setContextClassLoader(core.getResourceLoader().getClassLoader());
this.controller.init(initAttributes);
} finally {
ct.setContextClassLoader(prev);
}
SchemaField uniqueField = core.getLatestSchema().getUniqueKeyField();
if (uniqueField == null) {
throw new SolrException(SolrException.ErrorCode.SERVER_ERROR, CarrotClusteringEngine.class.getSimpleName() + " requires the schema to have a uniqueKeyField");
}
this.idFieldName = uniqueField.getName();
return result;
}
use of org.apache.solr.common.SolrException in project lucene-solr by apache.
the class ExpressionFactory method create.
/**
* Creates a single expression that contains delegate expressions and/or
* a StatsCollector.
* StatsCollectors are given as input and not created within the method so that
* expressions can share the same StatsCollectors, minimizing computation.
*
* @param expression String representation of the desired expression
* @param statsCollectors List of StatsCollectors to build the expression with.
* @return the expression
*/
@SuppressWarnings("deprecation")
public static Expression create(String expression, StatsCollector[] statsCollectors) {
int paren = expression.indexOf('(');
if (paren <= 0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The expression [" + expression + "] has no arguments and is not supported.");
}
String topOperation = expression.substring(0, paren).trim();
String operands;
try {
operands = expression.substring(paren + 1, expression.lastIndexOf(')')).trim();
} catch (Exception e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing closing parenthesis in [" + expression + "]", e);
}
// Statistic
if (AnalyticsParams.ALL_STAT_SET.contains(topOperation)) {
if (topOperation.equals(AnalyticsParams.STAT_PERCENTILE)) {
operands = expression.substring(expression.indexOf(',') + 1, expression.lastIndexOf(')')).trim();
topOperation = topOperation + "_" + expression.substring(expression.indexOf('(') + 1, expression.indexOf(',')).trim();
}
StatsCollector collector = null;
// Finds the desired counter and builds an expression around it and the desired statistic.
for (StatsCollector c : statsCollectors) {
if (c.valueSourceString().equals(operands)) {
collector = c;
break;
}
}
if (collector == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "ValueSource [" + operands + "] in Expression [" + expression + "] not found.");
}
return new BaseExpression(collector, topOperation);
}
// Constant
if (topOperation.equals(AnalyticsParams.CONSTANT_NUMBER)) {
try {
return new ConstantNumberExpression(Double.parseDouble(operands));
} catch (NumberFormatException e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The constant " + operands + " cannot be converted into a number.", e);
}
} else if (topOperation.equals(AnalyticsParams.CONSTANT_DATE)) {
return new ConstantDateExpression(DateMathParser.parseMath(null, operands));
} else if (topOperation.equals(AnalyticsParams.CONSTANT_STRING)) {
operands = expression.substring(paren + 1, expression.lastIndexOf(')'));
return new ConstantStringExpression(operands);
}
// Complex Delegating Expressions
String[] arguments = getArguments(operands);
Expression[] expArgs = new Expression[arguments.length];
for (int count = 0; count < arguments.length; count++) {
// Recursively builds delegate expressions
expArgs[count] = create(arguments[count], statsCollectors);
}
// Single Delegate Expressions
if (expArgs.length == 1) {
// Numeric Expression
if (topOperation.equals(AnalyticsParams.NEGATE)) {
return new NegateExpression(expArgs[0]);
}
if (topOperation.equals(AnalyticsParams.ABSOLUTE_VALUE)) {
return new AbsoluteValueExpression(expArgs[0]);
} else // String Expression
if (topOperation.equals(AnalyticsParams.REVERSE)) {
return new ReverseExpression(expArgs[0]);
}
throw new SolrException(ErrorCode.BAD_REQUEST, topOperation + " does not have the correct number of arguments.");
} else {
// Numeric Expression
if (topOperation.equals(AnalyticsParams.ADD)) {
return new AddExpression(expArgs);
} else if (topOperation.equals(AnalyticsParams.MULTIPLY)) {
return new MultiplyExpression(expArgs);
} else // Date Expression
if (topOperation.equals(AnalyticsParams.DATE_MATH)) {
return new DateMathExpression(expArgs);
} else // String Expression
if (topOperation.equals(AnalyticsParams.CONCATENATE)) {
return new ConcatenateExpression(expArgs);
} else // Dual Delegate Expressions
if (expArgs.length == 2 && (topOperation.equals(AnalyticsParams.DIVIDE) || topOperation.equals(AnalyticsParams.POWER) || topOperation.equals(AnalyticsParams.LOG))) {
// Numeric Expression
if (topOperation.equals(AnalyticsParams.DIVIDE)) {
return new DivideExpression(expArgs[0], expArgs[1]);
} else if (topOperation.equals(AnalyticsParams.POWER)) {
return new PowerExpression(expArgs[0], expArgs[1]);
} else if (topOperation.equals(AnalyticsParams.LOG)) {
return new LogExpression(expArgs[0], expArgs[1]);
}
return null;
}
throw new SolrException(ErrorCode.BAD_REQUEST, topOperation + " does not have the correct number of arguments or is unsupported.");
}
}
use of org.apache.solr.common.SolrException in project lucene-solr by apache.
the class StatsCollectorSupplierFactory method buildNumericSource.
/**
* Recursively parses and breaks down the expression string to build a numeric ValueSource.
*
* @param schema The schema to pull fields from.
* @param expressionString The expression string to build a ValueSource from.
* @return The value source represented by the given expressionString
*/
private static ValueSource buildNumericSource(IndexSchema schema, String expressionString) {
int paren = expressionString.indexOf('(');
String[] arguments;
String operands;
if (paren < 0) {
return buildFieldSource(schema, expressionString, NUMBER_TYPE);
} else {
try {
operands = expressionString.substring(paren + 1, expressionString.lastIndexOf(')')).trim();
} catch (Exception e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Missing closing parenthesis in [" + expressionString + "]");
}
arguments = ExpressionFactory.getArguments(operands);
}
String operation = expressionString.substring(0, paren).trim();
if (operation.equals(AnalyticsParams.CONSTANT_NUMBER)) {
if (arguments.length != 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The constant number declaration [" + expressionString + "] does not have exactly 1 argument.");
}
return new ConstDoubleSource(Double.parseDouble(arguments[0]));
} else if (operation.equals(AnalyticsParams.NEGATE)) {
if (arguments.length != 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The negate operation [" + expressionString + "] does not have exactly 1 argument.");
}
ValueSource argSource = buildNumericSource(schema, arguments[0]);
if (argSource == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The operation \"" + AnalyticsParams.NEGATE + "\" requires a numeric field or operation as argument. \"" + arguments[0] + "\" is not a numeric field or operation.");
}
return new NegateDoubleFunction(argSource);
} else if (operation.equals(AnalyticsParams.ABSOLUTE_VALUE)) {
if (arguments.length != 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The absolute value operation [" + expressionString + "] does not have exactly 1 argument.");
}
ValueSource argSource = buildNumericSource(schema, arguments[0]);
if (argSource == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The operation \"" + AnalyticsParams.NEGATE + "\" requires a numeric field or operation as argument. \"" + arguments[0] + "\" is not a numeric field or operation.");
}
return new AbsoluteValueDoubleFunction(argSource);
} else if (operation.equals(AnalyticsParams.FILTER)) {
return buildFilterSource(schema, operands, NUMBER_TYPE);
}
List<ValueSource> subExpressions = new ArrayList<>();
for (String argument : arguments) {
ValueSource argSource = buildNumericSource(schema, argument);
if (argSource == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The operation \"" + operation + "\" requires numeric fields or operations as arguments. \"" + argument + "\" is not a numeric field or operation.");
}
subExpressions.add(argSource);
}
if (operation.equals(AnalyticsParams.ADD)) {
return new AddDoubleFunction(subExpressions.toArray(new ValueSource[0]));
} else if (operation.equals(AnalyticsParams.MULTIPLY)) {
return new MultiplyDoubleFunction(subExpressions.toArray(new ValueSource[0]));
} else if (operation.equals(AnalyticsParams.DIVIDE)) {
if (subExpressions.size() != 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The divide operation [" + expressionString + "] does not have exactly 2 arguments.");
}
return new DivDoubleFunction(subExpressions.get(0), subExpressions.get(1));
} else if (operation.equals(AnalyticsParams.POWER)) {
if (subExpressions.size() != 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The power operation [" + expressionString + "] does not have exactly 2 arguments.");
}
return new PowDoubleFunction(subExpressions.get(0), subExpressions.get(1));
} else if (operation.equals(AnalyticsParams.LOG)) {
if (subExpressions.size() != 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The log operation [" + expressionString + "] does not have exactly 2 arguments.");
}
return new LogDoubleFunction(subExpressions.get(0), subExpressions.get(1));
}
if (AnalyticsParams.DATE_OPERATION_SET.contains(operation) || AnalyticsParams.STRING_OPERATION_SET.contains(operation)) {
return null;
}
throw new SolrException(ErrorCode.BAD_REQUEST, "The operation [" + expressionString + "] is not supported.");
}
Aggregations