Search in sources :

Example 31 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelReducerStream.

@Test
public void testParallelReducerStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    String zkHost = cluster.getZkServer().getZkAddress();
    StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost).withFunctionName("search", CloudSolrStream.class).withFunctionName("group", GroupOperation.class).withFunctionName("reduce", ReducerStream.class).withFunctionName("parallel", ParallelStream.class);
    try {
        ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", " + "reduce(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"id,a_s,a_i,a_f\", sort=\"a_s asc,a_f asc\", partitionKeys=\"a_s\"), " + "by=\"a_s\"," + "group(sort=\"a_i asc\", n=\"5\")), " + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_s asc\")");
        pstream.setStreamContext(streamContext);
        List<Tuple> tuples = getTuples(pstream);
        assert (tuples.size() == 3);
        Tuple t0 = tuples.get(0);
        List<Map> maps0 = t0.getMaps("group");
        assertMaps(maps0, 0, 1, 2, 9);
        Tuple t1 = tuples.get(1);
        List<Map> maps1 = t1.getMaps("group");
        assertMaps(maps1, 3, 5, 7, 8);
        Tuple t2 = tuples.get(2);
        List<Map> maps2 = t2.getMaps("group");
        assertMaps(maps2, 4, 6);
        pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", " + "reduce(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"id,a_s,a_i,a_f\", sort=\"a_s desc,a_f asc\", partitionKeys=\"a_s\"), " + "by=\"a_s\", " + "group(sort=\"a_i desc\", n=\"5\"))," + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_s desc\")");
        pstream.setStreamContext(streamContext);
        tuples = getTuples(pstream);
        assert (tuples.size() == 3);
        t0 = tuples.get(0);
        maps0 = t0.getMaps("group");
        assertMaps(maps0, 6, 4);
        t1 = tuples.get(1);
        maps1 = t1.getMaps("group");
        assertMaps(maps1, 8, 7, 5, 3);
        t2 = tuples.get(2);
        maps2 = t2.getMaps("group");
        assertMaps(maps2, 9, 2, 1, 0);
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 32 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class JDBCStreamTest method testJDBCSolrMerge.

@Test
public void testJDBCSolrMerge() throws Exception {
    // Load Database Data
    try (Connection connection = DriverManager.getConnection("jdbc:hsqldb:mem:.");
        Statement statement = connection.createStatement()) {
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('US', 'United States')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NL', 'Netherlands')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NP', 'Nepal')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NO', 'Norway')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('AL', 'Algeria')");
    }
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    // Load Solr
    new UpdateRequest().add(id, "0", "code_s", "GB", "name_s", "Great Britian").add(id, "1", "code_s", "CA", "name_s", "Canada").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class);
    List<Tuple> tuples;
    try {
        // Simple 1
        TupleStream jdbcStream = new JDBCStream("jdbc:hsqldb:mem:.", "select CODE,COUNTRY_NAME from COUNTRIES order by CODE", new FieldComparator("CODE", ComparatorOrder.ASCENDING));
        TupleStream selectStream = new SelectStream(jdbcStream, new HashMap<String, String>() {

            {
                put("CODE", "code_s");
                put("COUNTRY_NAME", "name_s");
            }
        });
        TupleStream searchStream = factory.constructStream("search(" + COLLECTIONORALIAS + ", fl=\"code_s,name_s\",q=\"*:*\",sort=\"code_s asc\")");
        TupleStream mergeStream = new MergeStream(new FieldComparator("code_s", ComparatorOrder.ASCENDING), new TupleStream[] { selectStream, searchStream });
        mergeStream.setStreamContext(streamContext);
        tuples = getTuples(mergeStream);
        assertEquals(7, tuples.size());
        assertOrderOf(tuples, "code_s", "AL", "CA", "GB", "NL", "NO", "NP", "US");
        assertOrderOf(tuples, "name_s", "Algeria", "Canada", "Great Britian", "Netherlands", "Norway", "Nepal", "United States");
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) Statement(java.sql.Statement) Connection(java.sql.Connection) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) FieldComparator(org.apache.solr.client.solrj.io.comp.FieldComparator) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 33 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testSortStream.

@Test
public void testSortStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1").add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    try {
        StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("sort", SortStream.class);
        // Basic test
        stream = factory.constructStream("sort(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc\")");
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assert (tuples.size() == 6);
        assertOrder(tuples, 0, 1, 5, 2, 3, 4);
        // Basic test desc
        stream = factory.constructStream("sort(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i desc\")");
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assert (tuples.size() == 6);
        assertOrder(tuples, 4, 3, 2, 1, 5, 0);
        // Basic w/multi comp
        stream = factory.constructStream("sort(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc, a_f desc\")");
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assert (tuples.size() == 6);
        assertOrder(tuples, 0, 5, 1, 2, 3, 4);
    } finally {
        solrClientCache.close();
    }
}
Also used : StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 34 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelIntersectStream.

@Test
public void testParallelIntersectStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "setA", "a_i", "0").add(id, "2", "a_s", "setA", "a_i", "1").add(id, "3", "a_s", "setA", "a_i", "2").add(id, "4", "a_s", "setA", "a_i", "3").add(id, "5", "a_s", "setB", "a_i", "2").add(id, "6", "a_s", "setB", "a_i", "3").add(id, "7", "a_s", "setAB", "a_i", "0").add(id, "8", "a_s", "setAB", "a_i", "6").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory streamFactory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("intersect", IntersectStream.class).withFunctionName("parallel", ParallelStream.class);
    // basic
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    try {
        String zkHost = cluster.getZkServer().getZkAddress();
        final TupleStream stream = streamFactory.constructStream("parallel(" + "collection1, " + "intersect(" + "search(collection1, q=a_s:(setA || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc, a_s asc\", partitionKeys=\"a_i\")," + "search(collection1, q=a_s:(setB || setAB), fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\")," + "on=\"a_i\")," + "workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i asc\")");
        stream.setStreamContext(streamContext);
        final List<Tuple> tuples = getTuples(stream);
        assert (tuples.size() == 5);
        assertOrder(tuples, 0, 7, 3, 4, 8);
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 35 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testExecutorStream.

@Test
public void testExecutorStream() throws Exception {
    CollectionAdminRequest.createCollection("workQueue", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("workQueue", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    CollectionAdminRequest.createCollection("mainCorpus", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("mainCorpus", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    CollectionAdminRequest.createCollection("destination", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("destination", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    UpdateRequest workRequest = new UpdateRequest();
    UpdateRequest dataRequest = new UpdateRequest();
    for (int i = 0; i < 500; i++) {
        workRequest.add(id, String.valueOf(i), "expr_s", "update(destination, batchSize=50, search(mainCorpus, q=id:" + i + ", rows=1, sort=\"id asc\", fl=\"id, body_t, field_i\"))");
        dataRequest.add(id, String.valueOf(i), "body_t", "hello world " + i, "field_i", Integer.toString(i));
    }
    workRequest.commit(cluster.getSolrClient(), "workQueue");
    dataRequest.commit(cluster.getSolrClient(), "mainCorpus");
    String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/destination";
    TupleStream executorStream;
    ModifiableSolrParams paramsLoc;
    StreamFactory factory = new StreamFactory().withCollectionZkHost("workQueue", cluster.getZkServer().getZkAddress()).withCollectionZkHost("mainCorpus", cluster.getZkServer().getZkAddress()).withCollectionZkHost("destination", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("executor", ExecutorStream.class).withFunctionName("update", UpdateStream.class);
    String executorExpression = "executor(threads=3, search(workQueue, q=\"*:*\", fl=\"id, expr_s\", rows=1000, sort=\"id desc\"))";
    executorStream = factory.constructStream(executorExpression);
    StreamContext context = new StreamContext();
    SolrClientCache clientCache = new SolrClientCache();
    context.setSolrClientCache(clientCache);
    executorStream.setStreamContext(context);
    getTuples(executorStream);
    //Destination collection should now contain all the records in the main corpus.
    cluster.getSolrClient().commit("destination");
    paramsLoc = new ModifiableSolrParams();
    paramsLoc.set("expr", "search(destination, q=\"*:*\", fl=\"id, body_t, field_i\", rows=1000, sort=\"field_i asc\")");
    paramsLoc.set("qt", "/stream");
    SolrStream solrStream = new SolrStream(url, paramsLoc);
    List<Tuple> tuples = getTuples(solrStream);
    assertTrue(tuples.size() == 500);
    for (int i = 0; i < 500; i++) {
        Tuple tuple = tuples.get(i);
        long ivalue = tuple.getLong("field_i");
        String body = tuple.getString("body_t");
        assertTrue(ivalue == i);
        assertTrue(body.equals("hello world " + i));
    }
    solrStream.close();
    clientCache.close();
    CollectionAdminRequest.deleteCollection("workQueue").process(cluster.getSolrClient());
    CollectionAdminRequest.deleteCollection("mainCorpus").process(cluster.getSolrClient());
    CollectionAdminRequest.deleteCollection("destination").process(cluster.getSolrClient());
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Aggregations

StreamFactory (org.apache.solr.client.solrj.io.stream.expr.StreamFactory)69 Tuple (org.apache.solr.client.solrj.io.Tuple)65 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)65 Test (org.junit.Test)64 SolrClientCache (org.apache.solr.client.solrj.io.SolrClientCache)61 StreamExpression (org.apache.solr.client.solrj.io.stream.expr.StreamExpression)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)14 MeanMetric (org.apache.solr.client.solrj.io.stream.metrics.MeanMetric)10 FieldComparator (org.apache.solr.client.solrj.io.comp.FieldComparator)9 MinMetric (org.apache.solr.client.solrj.io.stream.metrics.MinMetric)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)7 StreamContext (org.apache.solr.client.solrj.io.stream.StreamContext)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 Connection (java.sql.Connection)4 Statement (java.sql.Statement)4 List (java.util.List)4 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)4 AndEvaluator (org.apache.solr.client.solrj.io.eval.AndEvaluator)3