Search in sources :

Example 1 with ExpressionRequest

use of org.apache.solr.analytics.request.ExpressionRequest in project lucene-solr by apache.

the class StatsCollectorSupplierFactory method create.

/**
   * Builds a Supplier that will generate identical arrays of new StatsCollectors.
   * 
   * @param schema The Schema being used.
   * @param exRequests The expression requests to generate a StatsCollector[] from.
   * @return A Supplier that will return an array of new StatsCollector.
   */
@SuppressWarnings("unchecked")
public static Supplier<StatsCollector[]> create(IndexSchema schema, List<ExpressionRequest> exRequests) {
    final Map<String, Set<String>> collectorStats = new TreeMap<>();
    final Map<String, Set<Integer>> collectorPercs = new TreeMap<>();
    final Map<String, ValueSource> collectorSources = new TreeMap<>();
    // and statistics that need to be calculated on those ValueSources.
    for (ExpressionRequest expRequest : exRequests) {
        String statExpression = expRequest.getExpressionString();
        Set<String> statistics = getStatistics(statExpression);
        if (statistics == null) {
            continue;
        }
        for (String statExp : statistics) {
            String stat;
            String operands;
            try {
                stat = statExp.substring(0, statExp.indexOf('(')).trim();
                operands = statExp.substring(statExp.indexOf('(') + 1, statExp.lastIndexOf(')')).trim();
            } catch (Exception e) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse statistic: [" + statExpression + "]", e);
            }
            String[] arguments = ExpressionFactory.getArguments(operands);
            String source = arguments[0];
            if (stat.equals(AnalyticsParams.STAT_PERCENTILE)) {
                // The statistic is a percentile, extra parsing is required
                if (arguments.length < 2) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "Too few arguments given for " + stat + "() in [" + statExp + "].");
                } else if (arguments.length > 2) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
                }
                source = arguments[1];
                Set<Integer> percs = collectorPercs.get(source);
                if (percs == null) {
                    percs = new HashSet<>();
                    collectorPercs.put(source, percs);
                }
                try {
                    int perc = Integer.parseInt(arguments[0]);
                    if (perc > 0 && perc < 100) {
                        percs.add(perc);
                    } else {
                        throw new SolrException(ErrorCode.BAD_REQUEST, "The percentile in [" + statExp + "] is not between 0 and 100, exculsive.");
                    }
                } catch (NumberFormatException e) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + arguments[0] + "\" cannot be converted into a percentile.", e);
                }
            } else if (arguments.length > 1) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
            } else if (arguments.length == 0) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "No arguments given for " + stat + "() in [" + statExp + "].");
            }
            // Only unique ValueSources will be made; therefore statistics must be accumulated for
            // each ValueSource, even across different expression requests
            Set<String> stats = collectorStats.get(source);
            if (stats == null) {
                stats = new HashSet<>();
                collectorStats.put(source, stats);
            }
            if (AnalyticsParams.STAT_PERCENTILE.equals(stat)) {
                stats.add(stat + "_" + arguments[0]);
            } else {
                stats.add(stat);
            }
        }
    }
    String[] keys = collectorStats.keySet().toArray(new String[0]);
    for (String sourceStr : keys) {
        // Build one ValueSource for each unique value source string
        ValueSource source = buildSourceTree(schema, sourceStr);
        if (source == null) {
            throw new SolrException(ErrorCode.BAD_REQUEST, "The statistic [" + sourceStr + "] could not be parsed.");
        }
        String builtString = source.toString();
        collectorSources.put(builtString, source);
        // Replace the user given string with the correctly built string
        if (!builtString.equals(sourceStr)) {
            Set<String> stats = collectorStats.remove(sourceStr);
            if (stats != null) {
                collectorStats.put(builtString, stats);
            }
            Set<Integer> percs = collectorPercs.remove(sourceStr);
            if (percs != null) {
                collectorPercs.put(builtString, percs);
            }
            for (ExpressionRequest er : exRequests) {
                er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString));
            }
        }
    }
    if (collectorSources.size() == 0) {
        return new Supplier<StatsCollector[]>() {

            @Override
            public StatsCollector[] get() {
                return new StatsCollector[0];
            }
        };
    }
    log.info("Stats objects: " + collectorStats.size() + " sr=" + collectorSources.size() + " pr=" + collectorPercs.size());
    // All information is stored in final arrays so that nothing 
    // has to be computed when the Supplier's get() method is called.
    final Set<String>[] statsArr = collectorStats.values().toArray(new Set[0]);
    final ValueSource[] sourceArr = collectorSources.values().toArray(new ValueSource[0]);
    final boolean[] uniqueBools = new boolean[statsArr.length];
    final boolean[] medianBools = new boolean[statsArr.length];
    final boolean[] numericBools = new boolean[statsArr.length];
    final boolean[] dateBools = new boolean[statsArr.length];
    final double[][] percsArr = new double[statsArr.length][];
    final String[][] percsNames = new String[statsArr.length][];
    for (int count = 0; count < sourceArr.length; count++) {
        uniqueBools[count] = statsArr[count].contains(AnalyticsParams.STAT_UNIQUE);
        medianBools[count] = statsArr[count].contains(AnalyticsParams.STAT_MEDIAN);
        numericBools[count] = statsArr[count].contains(AnalyticsParams.STAT_SUM) || statsArr[count].contains(AnalyticsParams.STAT_SUM_OF_SQUARES) || statsArr[count].contains(AnalyticsParams.STAT_MEAN) || statsArr[count].contains(AnalyticsParams.STAT_STANDARD_DEVIATION);
        dateBools[count] = (sourceArr[count] instanceof DateFieldSource) | (sourceArr[count] instanceof MultiDateFunction) | (sourceArr[count] instanceof ConstDateSource);
        Set<Integer> ps = collectorPercs.get(sourceArr[count].toString());
        if (ps != null) {
            percsArr[count] = new double[ps.size()];
            percsNames[count] = new String[ps.size()];
            int percCount = 0;
            for (int p : ps) {
                percsArr[count][percCount] = p / 100.0;
                percsNames[count][percCount++] = AnalyticsParams.STAT_PERCENTILE + "_" + p;
            }
        }
    }
    // Making the Supplier
    return new Supplier<StatsCollector[]>() {

        public StatsCollector[] get() {
            StatsCollector[] collectors = new StatsCollector[statsArr.length];
            for (int count = 0; count < statsArr.length; count++) {
                if (numericBools[count]) {
                    StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new MedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                } else if (dateBools[count]) {
                    StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new DateMedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                } else {
                    StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new MedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                }
            }
            return collectors;
        }
    };
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) MultiDateFunction(org.apache.solr.analytics.util.valuesource.MultiDateFunction) ExpressionRequest(org.apache.solr.analytics.request.ExpressionRequest) DateFieldSource(org.apache.solr.analytics.util.valuesource.DateFieldSource) ConstDateSource(org.apache.solr.analytics.util.valuesource.ConstDateSource) Supplier(java.util.function.Supplier) SolrException(org.apache.solr.common.SolrException) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException) ValueSource(org.apache.lucene.queries.function.ValueSource)

Aggregations

HashSet (java.util.HashSet)1 Set (java.util.Set)1 TreeMap (java.util.TreeMap)1 Supplier (java.util.function.Supplier)1 ValueSource (org.apache.lucene.queries.function.ValueSource)1 ExpressionRequest (org.apache.solr.analytics.request.ExpressionRequest)1 ConstDateSource (org.apache.solr.analytics.util.valuesource.ConstDateSource)1 DateFieldSource (org.apache.solr.analytics.util.valuesource.DateFieldSource)1 MultiDateFunction (org.apache.solr.analytics.util.valuesource.MultiDateFunction)1 SolrException (org.apache.solr.common.SolrException)1