use of org.apache.solr.analytics.util.valuesource.ConstDateSource in project lucene-solr by apache.
the class StatsCollectorSupplierFactory method create.
/**
* Builds a Supplier that will generate identical arrays of new StatsCollectors.
*
* @param schema The Schema being used.
* @param exRequests The expression requests to generate a StatsCollector[] from.
* @return A Supplier that will return an array of new StatsCollector.
*/
@SuppressWarnings("unchecked")
public static Supplier<StatsCollector[]> create(IndexSchema schema, List<ExpressionRequest> exRequests) {
final Map<String, Set<String>> collectorStats = new TreeMap<>();
final Map<String, Set<Integer>> collectorPercs = new TreeMap<>();
final Map<String, ValueSource> collectorSources = new TreeMap<>();
// and statistics that need to be calculated on those ValueSources.
for (ExpressionRequest expRequest : exRequests) {
String statExpression = expRequest.getExpressionString();
Set<String> statistics = getStatistics(statExpression);
if (statistics == null) {
continue;
}
for (String statExp : statistics) {
String stat;
String operands;
try {
stat = statExp.substring(0, statExp.indexOf('(')).trim();
operands = statExp.substring(statExp.indexOf('(') + 1, statExp.lastIndexOf(')')).trim();
} catch (Exception e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse statistic: [" + statExpression + "]", e);
}
String[] arguments = ExpressionFactory.getArguments(operands);
String source = arguments[0];
if (stat.equals(AnalyticsParams.STAT_PERCENTILE)) {
// The statistic is a percentile, extra parsing is required
if (arguments.length < 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Too few arguments given for " + stat + "() in [" + statExp + "].");
} else if (arguments.length > 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
}
source = arguments[1];
Set<Integer> percs = collectorPercs.get(source);
if (percs == null) {
percs = new HashSet<>();
collectorPercs.put(source, percs);
}
try {
int perc = Integer.parseInt(arguments[0]);
if (perc > 0 && perc < 100) {
percs.add(perc);
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "The percentile in [" + statExp + "] is not between 0 and 100, exculsive.");
}
} catch (NumberFormatException e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + arguments[0] + "\" cannot be converted into a percentile.", e);
}
} else if (arguments.length > 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
} else if (arguments.length == 0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "No arguments given for " + stat + "() in [" + statExp + "].");
}
// Only unique ValueSources will be made; therefore statistics must be accumulated for
// each ValueSource, even across different expression requests
Set<String> stats = collectorStats.get(source);
if (stats == null) {
stats = new HashSet<>();
collectorStats.put(source, stats);
}
if (AnalyticsParams.STAT_PERCENTILE.equals(stat)) {
stats.add(stat + "_" + arguments[0]);
} else {
stats.add(stat);
}
}
}
String[] keys = collectorStats.keySet().toArray(new String[0]);
for (String sourceStr : keys) {
// Build one ValueSource for each unique value source string
ValueSource source = buildSourceTree(schema, sourceStr);
if (source == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The statistic [" + sourceStr + "] could not be parsed.");
}
String builtString = source.toString();
collectorSources.put(builtString, source);
// Replace the user given string with the correctly built string
if (!builtString.equals(sourceStr)) {
Set<String> stats = collectorStats.remove(sourceStr);
if (stats != null) {
collectorStats.put(builtString, stats);
}
Set<Integer> percs = collectorPercs.remove(sourceStr);
if (percs != null) {
collectorPercs.put(builtString, percs);
}
for (ExpressionRequest er : exRequests) {
er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString));
}
}
}
if (collectorSources.size() == 0) {
return new Supplier<StatsCollector[]>() {
@Override
public StatsCollector[] get() {
return new StatsCollector[0];
}
};
}
log.info("Stats objects: " + collectorStats.size() + " sr=" + collectorSources.size() + " pr=" + collectorPercs.size());
// All information is stored in final arrays so that nothing
// has to be computed when the Supplier's get() method is called.
final Set<String>[] statsArr = collectorStats.values().toArray(new Set[0]);
final ValueSource[] sourceArr = collectorSources.values().toArray(new ValueSource[0]);
final boolean[] uniqueBools = new boolean[statsArr.length];
final boolean[] medianBools = new boolean[statsArr.length];
final boolean[] numericBools = new boolean[statsArr.length];
final boolean[] dateBools = new boolean[statsArr.length];
final double[][] percsArr = new double[statsArr.length][];
final String[][] percsNames = new String[statsArr.length][];
for (int count = 0; count < sourceArr.length; count++) {
uniqueBools[count] = statsArr[count].contains(AnalyticsParams.STAT_UNIQUE);
medianBools[count] = statsArr[count].contains(AnalyticsParams.STAT_MEDIAN);
numericBools[count] = statsArr[count].contains(AnalyticsParams.STAT_SUM) || statsArr[count].contains(AnalyticsParams.STAT_SUM_OF_SQUARES) || statsArr[count].contains(AnalyticsParams.STAT_MEAN) || statsArr[count].contains(AnalyticsParams.STAT_STANDARD_DEVIATION);
dateBools[count] = (sourceArr[count] instanceof DateFieldSource) | (sourceArr[count] instanceof MultiDateFunction) | (sourceArr[count] instanceof ConstDateSource);
Set<Integer> ps = collectorPercs.get(sourceArr[count].toString());
if (ps != null) {
percsArr[count] = new double[ps.size()];
percsNames[count] = new String[ps.size()];
int percCount = 0;
for (int p : ps) {
percsArr[count][percCount] = p / 100.0;
percsNames[count][percCount++] = AnalyticsParams.STAT_PERCENTILE + "_" + p;
}
}
}
// Making the Supplier
return new Supplier<StatsCollector[]>() {
public StatsCollector[] get() {
StatsCollector[] collectors = new StatsCollector[statsArr.length];
for (int count = 0; count < statsArr.length; count++) {
if (numericBools[count]) {
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
if (uniqueBools[count])
sc = new UniqueStatsCollector(sc);
if (medianBools[count])
sc = new MedianStatsCollector(sc);
if (percsArr[count] != null)
sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
collectors[count] = sc;
} else if (dateBools[count]) {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
if (uniqueBools[count])
sc = new UniqueStatsCollector(sc);
if (medianBools[count])
sc = new DateMedianStatsCollector(sc);
if (percsArr[count] != null)
sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
collectors[count] = sc;
} else {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
if (uniqueBools[count])
sc = new UniqueStatsCollector(sc);
if (medianBools[count])
sc = new MedianStatsCollector(sc);
if (percsArr[count] != null)
sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
collectors[count] = sc;
}
}
return collectors;
}
};
}
use of org.apache.solr.analytics.util.valuesource.ConstDateSource in project lucene-solr by apache.
the class StatsCollectorSupplierFactory method buildDateSource.
/**
* Recursively parses and breaks down the expression string to build a date ValueSource.
*
* @param schema The schema to pull fields from.
* @param expressionString The expression string to build a ValueSource from.
* @return The value source represented by the given expressionString
*/
@SuppressWarnings("deprecation")
private static ValueSource buildDateSource(IndexSchema schema, String expressionString) {
int paren = expressionString.indexOf('(');
String[] arguments;
if (paren < 0) {
return buildFieldSource(schema, expressionString, DATE_TYPE);
} else {
arguments = ExpressionFactory.getArguments(expressionString.substring(paren + 1, expressionString.lastIndexOf(')')).trim());
}
String operands = arguments[0];
String operation = expressionString.substring(0, paren).trim();
if (operation.equals(AnalyticsParams.CONSTANT_DATE)) {
if (arguments.length != 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The constant date declaration [" + expressionString + "] does not have exactly 1 argument.");
}
return new ConstDateSource(DateMathParser.parseMath(null, operands));
} else if (operation.equals(AnalyticsParams.FILTER)) {
return buildFilterSource(schema, operands, DATE_TYPE);
}
if (operation.equals(AnalyticsParams.DATE_MATH)) {
List<ValueSource> subExpressions = new ArrayList<>();
boolean first = true;
for (String argument : arguments) {
ValueSource argSource;
if (first) {
first = false;
argSource = buildDateSource(schema, argument);
if (argSource == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + AnalyticsParams.DATE_MATH + "\" requires the first argument be a date operation or field. [" + argument + "] is not a date operation or field.");
}
} else {
argSource = buildStringSource(schema, argument);
if (argSource == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + AnalyticsParams.DATE_MATH + "\" requires that all arguments except the first be string operations. [" + argument + "] is not a string operation.");
}
}
subExpressions.add(argSource);
}
return new DateMathFunction(subExpressions.toArray(new ValueSource[0]));
}
if (AnalyticsParams.NUMERIC_OPERATION_SET.contains(operation) || AnalyticsParams.STRING_OPERATION_SET.contains(operation)) {
return null;
}
throw new SolrException(ErrorCode.BAD_REQUEST, "The operation [" + expressionString + "] is not supported.");
}
Aggregations