Search in sources :

Example 1 with ConstDateSource

use of org.apache.solr.analytics.util.valuesource.ConstDateSource in project lucene-solr by apache.

the class StatsCollectorSupplierFactory method create.

/**
   * Builds a Supplier that will generate identical arrays of new StatsCollectors.
   * 
   * @param schema The Schema being used.
   * @param exRequests The expression requests to generate a StatsCollector[] from.
   * @return A Supplier that will return an array of new StatsCollector.
   */
@SuppressWarnings("unchecked")
public static Supplier<StatsCollector[]> create(IndexSchema schema, List<ExpressionRequest> exRequests) {
    final Map<String, Set<String>> collectorStats = new TreeMap<>();
    final Map<String, Set<Integer>> collectorPercs = new TreeMap<>();
    final Map<String, ValueSource> collectorSources = new TreeMap<>();
    // and statistics that need to be calculated on those ValueSources.
    for (ExpressionRequest expRequest : exRequests) {
        String statExpression = expRequest.getExpressionString();
        Set<String> statistics = getStatistics(statExpression);
        if (statistics == null) {
            continue;
        }
        for (String statExp : statistics) {
            String stat;
            String operands;
            try {
                stat = statExp.substring(0, statExp.indexOf('(')).trim();
                operands = statExp.substring(statExp.indexOf('(') + 1, statExp.lastIndexOf(')')).trim();
            } catch (Exception e) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse statistic: [" + statExpression + "]", e);
            }
            String[] arguments = ExpressionFactory.getArguments(operands);
            String source = arguments[0];
            if (stat.equals(AnalyticsParams.STAT_PERCENTILE)) {
                // The statistic is a percentile, extra parsing is required
                if (arguments.length < 2) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "Too few arguments given for " + stat + "() in [" + statExp + "].");
                } else if (arguments.length > 2) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
                }
                source = arguments[1];
                Set<Integer> percs = collectorPercs.get(source);
                if (percs == null) {
                    percs = new HashSet<>();
                    collectorPercs.put(source, percs);
                }
                try {
                    int perc = Integer.parseInt(arguments[0]);
                    if (perc > 0 && perc < 100) {
                        percs.add(perc);
                    } else {
                        throw new SolrException(ErrorCode.BAD_REQUEST, "The percentile in [" + statExp + "] is not between 0 and 100, exculsive.");
                    }
                } catch (NumberFormatException e) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + arguments[0] + "\" cannot be converted into a percentile.", e);
                }
            } else if (arguments.length > 1) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
            } else if (arguments.length == 0) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "No arguments given for " + stat + "() in [" + statExp + "].");
            }
            // Only unique ValueSources will be made; therefore statistics must be accumulated for
            // each ValueSource, even across different expression requests
            Set<String> stats = collectorStats.get(source);
            if (stats == null) {
                stats = new HashSet<>();
                collectorStats.put(source, stats);
            }
            if (AnalyticsParams.STAT_PERCENTILE.equals(stat)) {
                stats.add(stat + "_" + arguments[0]);
            } else {
                stats.add(stat);
            }
        }
    }
    String[] keys = collectorStats.keySet().toArray(new String[0]);
    for (String sourceStr : keys) {
        // Build one ValueSource for each unique value source string
        ValueSource source = buildSourceTree(schema, sourceStr);
        if (source == null) {
            throw new SolrException(ErrorCode.BAD_REQUEST, "The statistic [" + sourceStr + "] could not be parsed.");
        }
        String builtString = source.toString();
        collectorSources.put(builtString, source);
        // Replace the user given string with the correctly built string
        if (!builtString.equals(sourceStr)) {
            Set<String> stats = collectorStats.remove(sourceStr);
            if (stats != null) {
                collectorStats.put(builtString, stats);
            }
            Set<Integer> percs = collectorPercs.remove(sourceStr);
            if (percs != null) {
                collectorPercs.put(builtString, percs);
            }
            for (ExpressionRequest er : exRequests) {
                er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString));
            }
        }
    }
    if (collectorSources.size() == 0) {
        return new Supplier<StatsCollector[]>() {

            @Override
            public StatsCollector[] get() {
                return new StatsCollector[0];
            }
        };
    }
    log.info("Stats objects: " + collectorStats.size() + " sr=" + collectorSources.size() + " pr=" + collectorPercs.size());
    // All information is stored in final arrays so that nothing 
    // has to be computed when the Supplier's get() method is called.
    final Set<String>[] statsArr = collectorStats.values().toArray(new Set[0]);
    final ValueSource[] sourceArr = collectorSources.values().toArray(new ValueSource[0]);
    final boolean[] uniqueBools = new boolean[statsArr.length];
    final boolean[] medianBools = new boolean[statsArr.length];
    final boolean[] numericBools = new boolean[statsArr.length];
    final boolean[] dateBools = new boolean[statsArr.length];
    final double[][] percsArr = new double[statsArr.length][];
    final String[][] percsNames = new String[statsArr.length][];
    for (int count = 0; count < sourceArr.length; count++) {
        uniqueBools[count] = statsArr[count].contains(AnalyticsParams.STAT_UNIQUE);
        medianBools[count] = statsArr[count].contains(AnalyticsParams.STAT_MEDIAN);
        numericBools[count] = statsArr[count].contains(AnalyticsParams.STAT_SUM) || statsArr[count].contains(AnalyticsParams.STAT_SUM_OF_SQUARES) || statsArr[count].contains(AnalyticsParams.STAT_MEAN) || statsArr[count].contains(AnalyticsParams.STAT_STANDARD_DEVIATION);
        dateBools[count] = (sourceArr[count] instanceof DateFieldSource) | (sourceArr[count] instanceof MultiDateFunction) | (sourceArr[count] instanceof ConstDateSource);
        Set<Integer> ps = collectorPercs.get(sourceArr[count].toString());
        if (ps != null) {
            percsArr[count] = new double[ps.size()];
            percsNames[count] = new String[ps.size()];
            int percCount = 0;
            for (int p : ps) {
                percsArr[count][percCount] = p / 100.0;
                percsNames[count][percCount++] = AnalyticsParams.STAT_PERCENTILE + "_" + p;
            }
        }
    }
    // Making the Supplier
    return new Supplier<StatsCollector[]>() {

        public StatsCollector[] get() {
            StatsCollector[] collectors = new StatsCollector[statsArr.length];
            for (int count = 0; count < statsArr.length; count++) {
                if (numericBools[count]) {
                    StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new MedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                } else if (dateBools[count]) {
                    StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new DateMedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                } else {
                    StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new MedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                }
            }
            return collectors;
        }
    };
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) MultiDateFunction(org.apache.solr.analytics.util.valuesource.MultiDateFunction) ExpressionRequest(org.apache.solr.analytics.request.ExpressionRequest) DateFieldSource(org.apache.solr.analytics.util.valuesource.DateFieldSource) ConstDateSource(org.apache.solr.analytics.util.valuesource.ConstDateSource) Supplier(java.util.function.Supplier) SolrException(org.apache.solr.common.SolrException) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException) ValueSource(org.apache.lucene.queries.function.ValueSource)

Example 2 with ConstDateSource

use of org.apache.solr.analytics.util.valuesource.ConstDateSource in project lucene-solr by apache.

the class StatsCollectorSupplierFactory method buildDateSource.

/**
   * Recursively parses and breaks down the expression string to build a date ValueSource.
   * 
   * @param schema The schema to pull fields from.
   * @param expressionString The expression string to build a ValueSource from.
   * @return The value source represented by the given expressionString
   */
@SuppressWarnings("deprecation")
private static ValueSource buildDateSource(IndexSchema schema, String expressionString) {
    int paren = expressionString.indexOf('(');
    String[] arguments;
    if (paren < 0) {
        return buildFieldSource(schema, expressionString, DATE_TYPE);
    } else {
        arguments = ExpressionFactory.getArguments(expressionString.substring(paren + 1, expressionString.lastIndexOf(')')).trim());
    }
    String operands = arguments[0];
    String operation = expressionString.substring(0, paren).trim();
    if (operation.equals(AnalyticsParams.CONSTANT_DATE)) {
        if (arguments.length != 1) {
            throw new SolrException(ErrorCode.BAD_REQUEST, "The constant date declaration [" + expressionString + "] does not have exactly 1 argument.");
        }
        return new ConstDateSource(DateMathParser.parseMath(null, operands));
    } else if (operation.equals(AnalyticsParams.FILTER)) {
        return buildFilterSource(schema, operands, DATE_TYPE);
    }
    if (operation.equals(AnalyticsParams.DATE_MATH)) {
        List<ValueSource> subExpressions = new ArrayList<>();
        boolean first = true;
        for (String argument : arguments) {
            ValueSource argSource;
            if (first) {
                first = false;
                argSource = buildDateSource(schema, argument);
                if (argSource == null) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + AnalyticsParams.DATE_MATH + "\" requires the first argument be a date operation or field. [" + argument + "] is not a date operation or field.");
                }
            } else {
                argSource = buildStringSource(schema, argument);
                if (argSource == null) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + AnalyticsParams.DATE_MATH + "\" requires that all arguments except the first be string operations. [" + argument + "] is not a string operation.");
                }
            }
            subExpressions.add(argSource);
        }
        return new DateMathFunction(subExpressions.toArray(new ValueSource[0]));
    }
    if (AnalyticsParams.NUMERIC_OPERATION_SET.contains(operation) || AnalyticsParams.STRING_OPERATION_SET.contains(operation)) {
        return null;
    }
    throw new SolrException(ErrorCode.BAD_REQUEST, "The operation [" + expressionString + "] is not supported.");
}
Also used : ValueSource(org.apache.lucene.queries.function.ValueSource) ArrayList(java.util.ArrayList) ConstDateSource(org.apache.solr.analytics.util.valuesource.ConstDateSource) DateMathFunction(org.apache.solr.analytics.util.valuesource.DateMathFunction) SolrException(org.apache.solr.common.SolrException)

Aggregations

ValueSource (org.apache.lucene.queries.function.ValueSource)2 ConstDateSource (org.apache.solr.analytics.util.valuesource.ConstDateSource)2 SolrException (org.apache.solr.common.SolrException)2 ArrayList (java.util.ArrayList)1 HashSet (java.util.HashSet)1 Set (java.util.Set)1 TreeMap (java.util.TreeMap)1 Supplier (java.util.function.Supplier)1 ExpressionRequest (org.apache.solr.analytics.request.ExpressionRequest)1 DateFieldSource (org.apache.solr.analytics.util.valuesource.DateFieldSource)1 DateMathFunction (org.apache.solr.analytics.util.valuesource.DateMathFunction)1 MultiDateFunction (org.apache.solr.analytics.util.valuesource.MultiDateFunction)1