Search in sources :

Example 81 with Supplier

use of java.util.function.Supplier in project lucene-solr by apache.

the class StatsCollectorSupplierFactory method create.

/**
   * Builds a Supplier that will generate identical arrays of new StatsCollectors.
   * 
   * @param schema The Schema being used.
   * @param exRequests The expression requests to generate a StatsCollector[] from.
   * @return A Supplier that will return an array of new StatsCollector.
   */
@SuppressWarnings("unchecked")
public static Supplier<StatsCollector[]> create(IndexSchema schema, List<ExpressionRequest> exRequests) {
    final Map<String, Set<String>> collectorStats = new TreeMap<>();
    final Map<String, Set<Integer>> collectorPercs = new TreeMap<>();
    final Map<String, ValueSource> collectorSources = new TreeMap<>();
    // and statistics that need to be calculated on those ValueSources.
    for (ExpressionRequest expRequest : exRequests) {
        String statExpression = expRequest.getExpressionString();
        Set<String> statistics = getStatistics(statExpression);
        if (statistics == null) {
            continue;
        }
        for (String statExp : statistics) {
            String stat;
            String operands;
            try {
                stat = statExp.substring(0, statExp.indexOf('(')).trim();
                operands = statExp.substring(statExp.indexOf('(') + 1, statExp.lastIndexOf(')')).trim();
            } catch (Exception e) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Unable to parse statistic: [" + statExpression + "]", e);
            }
            String[] arguments = ExpressionFactory.getArguments(operands);
            String source = arguments[0];
            if (stat.equals(AnalyticsParams.STAT_PERCENTILE)) {
                // The statistic is a percentile, extra parsing is required
                if (arguments.length < 2) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "Too few arguments given for " + stat + "() in [" + statExp + "].");
                } else if (arguments.length > 2) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
                }
                source = arguments[1];
                Set<Integer> percs = collectorPercs.get(source);
                if (percs == null) {
                    percs = new HashSet<>();
                    collectorPercs.put(source, percs);
                }
                try {
                    int perc = Integer.parseInt(arguments[0]);
                    if (perc > 0 && perc < 100) {
                        percs.add(perc);
                    } else {
                        throw new SolrException(ErrorCode.BAD_REQUEST, "The percentile in [" + statExp + "] is not between 0 and 100, exculsive.");
                    }
                } catch (NumberFormatException e) {
                    throw new SolrException(ErrorCode.BAD_REQUEST, "\"" + arguments[0] + "\" cannot be converted into a percentile.", e);
                }
            } else if (arguments.length > 1) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "Too many arguments given for " + stat + "() in [" + statExp + "].");
            } else if (arguments.length == 0) {
                throw new SolrException(ErrorCode.BAD_REQUEST, "No arguments given for " + stat + "() in [" + statExp + "].");
            }
            // Only unique ValueSources will be made; therefore statistics must be accumulated for
            // each ValueSource, even across different expression requests
            Set<String> stats = collectorStats.get(source);
            if (stats == null) {
                stats = new HashSet<>();
                collectorStats.put(source, stats);
            }
            if (AnalyticsParams.STAT_PERCENTILE.equals(stat)) {
                stats.add(stat + "_" + arguments[0]);
            } else {
                stats.add(stat);
            }
        }
    }
    String[] keys = collectorStats.keySet().toArray(new String[0]);
    for (String sourceStr : keys) {
        // Build one ValueSource for each unique value source string
        ValueSource source = buildSourceTree(schema, sourceStr);
        if (source == null) {
            throw new SolrException(ErrorCode.BAD_REQUEST, "The statistic [" + sourceStr + "] could not be parsed.");
        }
        String builtString = source.toString();
        collectorSources.put(builtString, source);
        // Replace the user given string with the correctly built string
        if (!builtString.equals(sourceStr)) {
            Set<String> stats = collectorStats.remove(sourceStr);
            if (stats != null) {
                collectorStats.put(builtString, stats);
            }
            Set<Integer> percs = collectorPercs.remove(sourceStr);
            if (percs != null) {
                collectorPercs.put(builtString, percs);
            }
            for (ExpressionRequest er : exRequests) {
                er.setExpressionString(er.getExpressionString().replace(sourceStr, builtString));
            }
        }
    }
    if (collectorSources.size() == 0) {
        return new Supplier<StatsCollector[]>() {

            @Override
            public StatsCollector[] get() {
                return new StatsCollector[0];
            }
        };
    }
    log.info("Stats objects: " + collectorStats.size() + " sr=" + collectorSources.size() + " pr=" + collectorPercs.size());
    // All information is stored in final arrays so that nothing 
    // has to be computed when the Supplier's get() method is called.
    final Set<String>[] statsArr = collectorStats.values().toArray(new Set[0]);
    final ValueSource[] sourceArr = collectorSources.values().toArray(new ValueSource[0]);
    final boolean[] uniqueBools = new boolean[statsArr.length];
    final boolean[] medianBools = new boolean[statsArr.length];
    final boolean[] numericBools = new boolean[statsArr.length];
    final boolean[] dateBools = new boolean[statsArr.length];
    final double[][] percsArr = new double[statsArr.length][];
    final String[][] percsNames = new String[statsArr.length][];
    for (int count = 0; count < sourceArr.length; count++) {
        uniqueBools[count] = statsArr[count].contains(AnalyticsParams.STAT_UNIQUE);
        medianBools[count] = statsArr[count].contains(AnalyticsParams.STAT_MEDIAN);
        numericBools[count] = statsArr[count].contains(AnalyticsParams.STAT_SUM) || statsArr[count].contains(AnalyticsParams.STAT_SUM_OF_SQUARES) || statsArr[count].contains(AnalyticsParams.STAT_MEAN) || statsArr[count].contains(AnalyticsParams.STAT_STANDARD_DEVIATION);
        dateBools[count] = (sourceArr[count] instanceof DateFieldSource) | (sourceArr[count] instanceof MultiDateFunction) | (sourceArr[count] instanceof ConstDateSource);
        Set<Integer> ps = collectorPercs.get(sourceArr[count].toString());
        if (ps != null) {
            percsArr[count] = new double[ps.size()];
            percsNames[count] = new String[ps.size()];
            int percCount = 0;
            for (int p : ps) {
                percsArr[count][percCount] = p / 100.0;
                percsNames[count][percCount++] = AnalyticsParams.STAT_PERCENTILE + "_" + p;
            }
        }
    }
    // Making the Supplier
    return new Supplier<StatsCollector[]>() {

        public StatsCollector[] get() {
            StatsCollector[] collectors = new StatsCollector[statsArr.length];
            for (int count = 0; count < statsArr.length; count++) {
                if (numericBools[count]) {
                    StatsCollector sc = new NumericStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new MedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                } else if (dateBools[count]) {
                    StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new DateMedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                } else {
                    StatsCollector sc = new MinMaxStatsCollector(sourceArr[count], statsArr[count]);
                    if (uniqueBools[count])
                        sc = new UniqueStatsCollector(sc);
                    if (medianBools[count])
                        sc = new MedianStatsCollector(sc);
                    if (percsArr[count] != null)
                        sc = new PercentileStatsCollector(sc, percsArr[count], percsNames[count]);
                    collectors[count] = sc;
                }
            }
            return collectors;
        }
    };
}
Also used : Set(java.util.Set) HashSet(java.util.HashSet) MultiDateFunction(org.apache.solr.analytics.util.valuesource.MultiDateFunction) ExpressionRequest(org.apache.solr.analytics.request.ExpressionRequest) DateFieldSource(org.apache.solr.analytics.util.valuesource.DateFieldSource) ConstDateSource(org.apache.solr.analytics.util.valuesource.ConstDateSource) Supplier(java.util.function.Supplier) SolrException(org.apache.solr.common.SolrException) TreeMap(java.util.TreeMap) SolrException(org.apache.solr.common.SolrException) ValueSource(org.apache.lucene.queries.function.ValueSource)

Aggregations

Supplier (java.util.function.Supplier)81 List (java.util.List)29 Test (org.junit.Test)28 Map (java.util.Map)16 Collectors (java.util.stream.Collectors)15 Function (java.util.function.Function)13 IOException (java.io.IOException)12 ArrayList (java.util.ArrayList)12 HashMap (java.util.HashMap)12 Arrays (java.util.Arrays)11 Consumer (java.util.function.Consumer)11 Assert.assertEquals (org.junit.Assert.assertEquals)10 Optional (java.util.Optional)9 Mockito.mock (org.mockito.Mockito.mock)6 Collections (java.util.Collections)5 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)5 Assert.assertFalse (org.junit.Assert.assertFalse)5 Matchers.any (org.mockito.Matchers.any)5 Mockito.when (org.mockito.Mockito.when)5 Maps (com.google.common.collect.Maps)4