Search in sources :

Example 66 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelExecutorStream.

@Test
public void testParallelExecutorStream() throws Exception {
    CollectionAdminRequest.createCollection("workQueue", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("workQueue", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    CollectionAdminRequest.createCollection("mainCorpus", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("mainCorpus", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    CollectionAdminRequest.createCollection("destination", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("destination", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    UpdateRequest workRequest = new UpdateRequest();
    UpdateRequest dataRequest = new UpdateRequest();
    for (int i = 0; i < 500; i++) {
        workRequest.add(id, String.valueOf(i), "expr_s", "update(destination, batchSize=50, search(mainCorpus, q=id:" + i + ", rows=1, sort=\"id asc\", fl=\"id, body_t, field_i\"))");
        dataRequest.add(id, String.valueOf(i), "body_t", "hello world " + i, "field_i", Integer.toString(i));
    }
    workRequest.commit(cluster.getSolrClient(), "workQueue");
    dataRequest.commit(cluster.getSolrClient(), "mainCorpus");
    String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/destination";
    TupleStream executorStream;
    ModifiableSolrParams paramsLoc;
    StreamFactory factory = new StreamFactory().withCollectionZkHost("workQueue", cluster.getZkServer().getZkAddress()).withCollectionZkHost("mainCorpus", cluster.getZkServer().getZkAddress()).withCollectionZkHost("destination", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("executor", ExecutorStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("update", UpdateStream.class);
    String executorExpression = "parallel(workQueue, workers=2, sort=\"EOF asc\", executor(threads=3, queueSize=100, search(workQueue, q=\"*:*\", fl=\"id, expr_s\", rows=1000, partitionKeys=id, sort=\"id desc\")))";
    executorStream = factory.constructStream(executorExpression);
    StreamContext context = new StreamContext();
    SolrClientCache clientCache = new SolrClientCache();
    context.setSolrClientCache(clientCache);
    executorStream.setStreamContext(context);
    getTuples(executorStream);
    //Destination collection should now contain all the records in the main corpus.
    cluster.getSolrClient().commit("destination");
    paramsLoc = new ModifiableSolrParams();
    paramsLoc.set("expr", "search(destination, q=\"*:*\", fl=\"id, body_t, field_i\", rows=1000, sort=\"field_i asc\")");
    paramsLoc.set("qt", "/stream");
    SolrStream solrStream = new SolrStream(url, paramsLoc);
    List<Tuple> tuples = getTuples(solrStream);
    assertTrue(tuples.size() == 500);
    for (int i = 0; i < 500; i++) {
        Tuple tuple = tuples.get(i);
        long ivalue = tuple.getLong("field_i");
        String body = tuple.getString("body_t");
        assertTrue(ivalue == i);
        assertTrue(body.equals("hello world " + i));
    }
    solrStream.close();
    clientCache.close();
    CollectionAdminRequest.deleteCollection("workQueue").process(cluster.getSolrClient());
    CollectionAdminRequest.deleteCollection("mainCorpus").process(cluster.getSolrClient());
    CollectionAdminRequest.deleteCollection("destination").process(cluster.getSolrClient());
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 67 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelRollupStream.

@Test
public void testParallelRollupStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class);
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    try {
        expression = StreamExpressionParser.parse("parallel(" + COLLECTIONORALIAS + "," + "rollup(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"a_s,a_i,a_f\", sort=\"a_s asc\", partitionKeys=\"a_s\")," + "over=\"a_s\"," + "sum(a_i)," + "sum(a_f)," + "min(a_i)," + "min(a_f)," + "max(a_i)," + "max(a_f)," + "avg(a_i)," + "avg(a_f)," + "count(*)" + ")," + "workers=\"2\", zkHost=\"" + cluster.getZkServer().getZkAddress() + "\", sort=\"a_s asc\")");
        stream = factory.constructStream(expression);
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assert (tuples.size() == 3);
        //Test Long and Double Sums
        Tuple tuple = tuples.get(0);
        String bucket = tuple.getString("a_s");
        Double sumi = tuple.getDouble("sum(a_i)");
        Double sumf = tuple.getDouble("sum(a_f)");
        Double mini = tuple.getDouble("min(a_i)");
        Double minf = tuple.getDouble("min(a_f)");
        Double maxi = tuple.getDouble("max(a_i)");
        Double maxf = tuple.getDouble("max(a_f)");
        Double avgi = tuple.getDouble("avg(a_i)");
        Double avgf = tuple.getDouble("avg(a_f)");
        Double count = tuple.getDouble("count(*)");
        assertTrue(bucket.equals("hello0"));
        assertTrue(sumi.doubleValue() == 17.0D);
        assertTrue(sumf.doubleValue() == 18.0D);
        assertTrue(mini.doubleValue() == 0.0D);
        assertTrue(minf.doubleValue() == 1.0D);
        assertTrue(maxi.doubleValue() == 14.0D);
        assertTrue(maxf.doubleValue() == 10.0D);
        assertTrue(avgi.doubleValue() == 4.25D);
        assertTrue(avgf.doubleValue() == 4.5D);
        assertTrue(count.doubleValue() == 4);
        tuple = tuples.get(1);
        bucket = tuple.getString("a_s");
        sumi = tuple.getDouble("sum(a_i)");
        sumf = tuple.getDouble("sum(a_f)");
        mini = tuple.getDouble("min(a_i)");
        minf = tuple.getDouble("min(a_f)");
        maxi = tuple.getDouble("max(a_i)");
        maxf = tuple.getDouble("max(a_f)");
        avgi = tuple.getDouble("avg(a_i)");
        avgf = tuple.getDouble("avg(a_f)");
        count = tuple.getDouble("count(*)");
        assertTrue(bucket.equals("hello3"));
        assertTrue(sumi.doubleValue() == 38.0D);
        assertTrue(sumf.doubleValue() == 26.0D);
        assertTrue(mini.doubleValue() == 3.0D);
        assertTrue(minf.doubleValue() == 3.0D);
        assertTrue(maxi.doubleValue() == 13.0D);
        assertTrue(maxf.doubleValue() == 9.0D);
        assertTrue(avgi.doubleValue() == 9.5D);
        assertTrue(avgf.doubleValue() == 6.5D);
        assertTrue(count.doubleValue() == 4);
        tuple = tuples.get(2);
        bucket = tuple.getString("a_s");
        sumi = tuple.getDouble("sum(a_i)");
        sumf = tuple.getDouble("sum(a_f)");
        mini = tuple.getDouble("min(a_i)");
        minf = tuple.getDouble("min(a_f)");
        maxi = tuple.getDouble("max(a_i)");
        maxf = tuple.getDouble("max(a_f)");
        avgi = tuple.getDouble("avg(a_i)");
        avgf = tuple.getDouble("avg(a_f)");
        count = tuple.getDouble("count(*)");
        assertTrue(bucket.equals("hello4"));
        assertTrue(sumi.longValue() == 15);
        assertTrue(sumf.doubleValue() == 11.0D);
        assertTrue(mini.doubleValue() == 4.0D);
        assertTrue(minf.doubleValue() == 4.0D);
        assertTrue(maxi.doubleValue() == 11.0D);
        assertTrue(maxf.doubleValue() == 7.0D);
        assertTrue(avgi.doubleValue() == 7.5D);
        assertTrue(avgf.doubleValue() == 5.5D);
        assertTrue(count.doubleValue() == 2);
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) MinMetric(org.apache.solr.client.solrj.io.stream.metrics.MinMetric) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) MeanMetric(org.apache.solr.client.solrj.io.stream.metrics.MeanMetric) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 68 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class SelectWithEvaluatorsTest method testSelectWithEvaluatorsStream.

@Test
public void testSelectWithEvaluatorsStream() throws Exception {
    new UpdateRequest().add(id, "1", "a_s", "foo", "b_i", "1", "c_d", "3.3", "d_b", "true").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    String clause;
    TupleStream stream;
    List<Tuple> tuples;
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("add", AddEvaluator.class).withFunctionName("if", IfThenElseEvaluator.class).withFunctionName("gt", GreaterThanEvaluator.class);
    try {
        // Basic test
        clause = "select(" + "id," + "add(b_i,c_d) as result," + "search(collection1, q=*:*, fl=\"id,a_s,b_i,c_d,d_b\", sort=\"id asc\")" + ")";
        stream = factory.constructStream(clause);
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assertFields(tuples, "id", "result");
        assertNotFields(tuples, "a_s", "b_i", "c_d", "d_b");
        assertEquals(1, tuples.size());
        assertDouble(tuples.get(0), "result", 4.3);
        assertEquals(4.3, tuples.get(0).get("result"));
    } finally {
        solrClientCache.close();
    }
}
Also used : IfThenElseEvaluator(org.apache.solr.client.solrj.io.eval.IfThenElseEvaluator) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 69 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class SolrTable method handleGroupByFacet.

private TupleStream handleGroupByFacet(String zkHost, String collection, final List<Map.Entry<String, Class>> fields, final String query, final List<Pair<String, String>> orders, final List<String> bucketFields, final List<Pair<String, String>> metricPairs, final String lim, final String havingPredicate) throws IOException {
    Map<String, Class> fmap = new HashMap();
    for (Map.Entry<String, Class> f : fields) {
        fmap.put(f.getKey(), f.getValue());
    }
    ModifiableSolrParams solrParams = new ModifiableSolrParams();
    solrParams.add(CommonParams.Q, query);
    Bucket[] buckets = buildBuckets(bucketFields, fields);
    Metric[] metrics = buildMetrics(metricPairs, true).toArray(new Metric[0]);
    if (metrics.length == 0) {
        metrics = new Metric[1];
        metrics[0] = new CountMetric();
    } else {
        for (Metric metric : metrics) {
            Class c = fmap.get(metric.getIdentifier());
            if (Long.class.equals(c)) {
                metric.outputLong = true;
            }
        }
    }
    int limit = lim != null ? Integer.parseInt(lim) : 1000;
    FieldComparator[] sorts = null;
    if (orders == null || orders.size() == 0) {
        sorts = new FieldComparator[buckets.length];
        for (int i = 0; i < sorts.length; i++) {
            sorts[i] = new FieldComparator("index", ComparatorOrder.ASCENDING);
        }
    } else {
        sorts = getComps(orders);
    }
    int overfetch = (int) (limit * 1.25);
    TupleStream tupleStream = new FacetStream(zkHost, collection, solrParams, buckets, metrics, sorts, overfetch);
    StreamFactory factory = new StreamFactory().withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class).withFunctionName("and", AndEvaluator.class).withFunctionName("or", OrEvaluator.class).withFunctionName("not", NotEvaluator.class).withFunctionName("eq", EqualsEvaluator.class).withFunctionName("val", RawValueEvaluator.class).withFunctionName("gt", GreaterThanEvaluator.class).withFunctionName("lt", LessThanEvaluator.class).withFunctionName("lteq", LessThanEqualToEvaluator.class).withFunctionName("gteq", GreaterThanEqualToEvaluator.class);
    if (havingPredicate != null) {
        BooleanEvaluator booleanOperation = (BooleanEvaluator) factory.constructEvaluator(StreamExpressionParser.parse(havingPredicate));
        tupleStream = new HavingStream(tupleStream, booleanOperation);
    }
    if (lim != null) {
        tupleStream = new LimitStream(tupleStream, limit);
    }
    return tupleStream;
}
Also used : OrEvaluator(org.apache.solr.client.solrj.io.eval.OrEvaluator) EqualsEvaluator(org.apache.solr.client.solrj.io.eval.EqualsEvaluator) GreaterThanEvaluator(org.apache.solr.client.solrj.io.eval.GreaterThanEvaluator) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) BooleanEvaluator(org.apache.solr.client.solrj.io.eval.BooleanEvaluator) LessThanEqualToEvaluator(org.apache.solr.client.solrj.io.eval.LessThanEqualToEvaluator) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) FieldComparator(org.apache.solr.client.solrj.io.comp.FieldComparator) MultipleFieldComparator(org.apache.solr.client.solrj.io.comp.MultipleFieldComparator)

Aggregations

StreamFactory (org.apache.solr.client.solrj.io.stream.expr.StreamFactory)69 Tuple (org.apache.solr.client.solrj.io.Tuple)65 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)65 Test (org.junit.Test)64 SolrClientCache (org.apache.solr.client.solrj.io.SolrClientCache)61 StreamExpression (org.apache.solr.client.solrj.io.stream.expr.StreamExpression)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)14 MeanMetric (org.apache.solr.client.solrj.io.stream.metrics.MeanMetric)10 FieldComparator (org.apache.solr.client.solrj.io.comp.FieldComparator)9 MinMetric (org.apache.solr.client.solrj.io.stream.metrics.MinMetric)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)7 StreamContext (org.apache.solr.client.solrj.io.stream.StreamContext)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 Connection (java.sql.Connection)4 Statement (java.sql.Statement)4 List (java.util.List)4 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)4 AndEvaluator (org.apache.solr.client.solrj.io.eval.AndEvaluator)3