use of org.apache.solr.analytics.request.ExpressionRequest in project lucene-solr by apache.
the class StatsCollectorSupplierFactory method create.
/**
* Builds a Supplier that will generate identical arrays of new StatsCollectors.
*
* @param schema The Schema being used.
* @param exRequests The expression requests to generate a StatsCollector[] from.
* @return A Supplier that will return an array of new StatsCollector.
*/
@SuppressWarnings("unchecked")
public static Supplier<StatsCollector[]> create(IndexSchema schema, List<ExpressionRequest> exRequests) {
final Map<String, Set<String>> collectorStats = new TreeMap<>();
final Map<String, Set<Integer>> collectorPercs = new TreeMap<>();
final Map<String, ValueSource> collectorSources = new TreeMap<>();
// and statistics that need to be calculated on those ValueSources.
for (ExpressionRequest expRequest : exRequests) {
String statExpression = expRequest.getExpressionString();
Set<String> statistics = getStatistics(statExpression);
if (statistics == null) {
continue;
}
for (String statExp : statistics) {
String stat;
String operands;
try {
stat = statExp.substring(0, statExp.indexOf('(')).trim();
operands = statExp.substring(statExp.indexOf('(') + 1, statExp.lastIndexOf(')')).trim();
} catch (Exception e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse statistic: [" + statExpression + "]", e);
}
String[] arguments = ExpressionFactory.getArguments(operands);
String source = arguments[0];
if (stat.equals(AnalyticsParams.STAT_PERCENTILE)) {
// The statistic is a percentile, extra parsing is required
if (arguments.length < 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Too few arguments given for " + stat + "() in [" + statExp + "].");
} else if (arguments.length > 2) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
}
source = arguments[1];
Set<Integer> percs = collectorPercs.get(source);
if (percs == null) {
percs = new HashSet<>();
collectorPercs.put(source, percs);
}
try {
int perc = Integer.parseInt(arguments[0]);
if (perc > 0 && perc < 100) {
percs.add(perc);
} else {
throw new SolrException(ErrorCode.BAD_REQUEST, "The percentile in [" + statExp + "] is not between 0 and 100, exculsive.");
}
} catch (NumberFormatException e) {
throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + arguments[0] + "\" cannot be converted into a percentile.", e);
}
} else if (arguments.length > 1) {
throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
} else if (arguments.length == 0) {
throw new SolrException(ErrorCode.BAD_REQUEST, "No arguments given for " + stat + "() in [" + statExp + "].");
}
// Only unique ValueSources will be made; therefore statistics must be accumulated for
// each ValueSource, even across different expression requests
Set<String> stats = collectorStats.get(source);
if (stats == null) {
stats = new HashSet<>();
collectorStats.put(source, stats);
}
if (AnalyticsParams.STAT_PERCENTILE.equals(stat)) {
stats.add(stat + "_" + arguments[0]);
} else {
stats.add(stat);
}
}
}
String[] keys = collectorStats.keySet().toArray(new String[0]);
for (String sourceStr : keys) {
// Build one ValueSource for each unique value source string
ValueSource source = buildSourceTree(schema, sourceStr);
if (source == null) {
throw new SolrException(ErrorCode.BAD_REQUEST, "The statistic [" + sourceStr + "] could not be parsed.");
}
String builtString = source.toString();
collectorSources.put(builtString, source);
// Replace the user given string with the correctly built string
if (!builtString.equals(sourceStr)) {
Set<String> stats = collectorStats.remove(sourceStr);
if (stats != null) {
collectorStats.put(builtString, stats);
}
Set<Integer> percs = collectorPercs.remove(sourceStr);
if (percs != null) {
collectorPercs.put(builtString, percs);
}
for (ExpressionRequest er : exRequests) {
er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString));
}
}
}
if (collectorSources.size() == 0) {
return new Supplier<StatsCollector[]>() {
@Override
public StatsCollector[] get() {
return new StatsCollector[0];
}
};
}
log.info("Stats objects: " + collectorStats.size() + " sr=" + collectorSources.size() + " pr=" + collectorPercs.size());
// All information is stored in final arrays so that nothing
// has to be computed when the Supplier's get() method is called.
final Set<String>[] statsArr = collectorStats.values().toArray(new Set[0]);
final ValueSource[] sourceArr = collectorSources.values().toArray(new ValueSource[0]);
final boolean[] uniqueBools = new boolean[statsArr.length];
final boolean[] medianBools = new boolean[statsArr.length];
final boolean[] numericBools = new boolean[statsArr.length];
final boolean[] dateBools = new boolean[statsArr.length];
final double[][] percsArr = new double[statsArr.length][];
final String[][] percsNames = new String[statsArr.length][];
for (int count = 0; count < sourceArr.length; count++) {
uniqueBools[count] = statsArr[count].contains(AnalyticsParams.STAT_UNIQUE);
medianBools[count] = statsArr[count].contains(AnalyticsParams.STAT_MEDIAN);
numericBools[count] = statsArr[count].contains(AnalyticsParams.STAT_SUM) || statsArr[count].contains(AnalyticsParams.STAT_SUM_OF_SQUARES) || statsArr[count].contains(AnalyticsParams.STAT_MEAN) || statsArr[count].contains(AnalyticsParams.STAT_STANDARD_DEVIATION);
dateBools[count] = (sourceArr[count] instanceof DateFieldSource) | (sourceArr[count] instanceof MultiDateFunction) | (sourceArr[count] instanceof ConstDateSource);
Set<Integer> ps = collectorPercs.get(sourceArr[count].toString());
if (ps != null) {
percsArr[count] = new double[ps.size()];
percsNames[count] = new String[ps.size()];
int percCount = 0;
for (int p : ps) {
percsArr[count][percCount] = p / 100.0;
percsNames[count][percCount++] = AnalyticsParams.STAT_PERCENTILE + "_" + p;
}
}
}
// Making the Supplier
return new Supplier<StatsCollector[]>() {
public StatsCollector[] get() {
StatsCollector[] collectors = new StatsCollector[statsArr.length];
for (int count = 0; count < statsArr.length; count++) {
if (numericBools[count]) {
StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
if (uniqueBools[count])
sc = new UniqueStatsCollector(sc);
if (medianBools[count])
sc = new MedianStatsCollector(sc);
if (percsArr[count] != null)
sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
collectors[count] = sc;
} else if (dateBools[count]) {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
if (uniqueBools[count])
sc = new UniqueStatsCollector(sc);
if (medianBools[count])
sc = new DateMedianStatsCollector(sc);
if (percsArr[count] != null)
sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
collectors[count] = sc;
} else {
StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
if (uniqueBools[count])
sc = new UniqueStatsCollector(sc);
if (medianBools[count])
sc = new MedianStatsCollector(sc);
if (percsArr[count] != null)
sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
collectors[count] = sc;
}
}
return collectors;
}
};
}
Aggregations