Search in sources :

Example 6 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testTerminatingDaemonStream.

@Test
public void testTerminatingDaemonStream() throws Exception {
    Assume.assumeTrue(!useAlias);
    new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    DaemonStream daemonStream;
    SolrClientCache cache = new SolrClientCache();
    StreamContext context = new StreamContext();
    context.setSolrClientCache(cache);
    expression = StreamExpressionParser.parse("daemon(topic(" + COLLECTIONORALIAS + "," + COLLECTIONORALIAS + ", q=\"a_s:hello\", initialCheckpoint=0, id=\"topic1\", rows=2, fl=\"id\"" + "), id=test, runInterval=1000, terminate=true, queueSize=50)");
    daemonStream = (DaemonStream) factory.constructStream(expression);
    daemonStream.setStreamContext(context);
    List<Tuple> tuples = getTuples(daemonStream);
    assertTrue(tuples.size() == 10);
    cache.close();
}
Also used : StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 7 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelNullStream.

@Test
public void testParallelNullStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1").add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("null", NullStream.class).withFunctionName("parallel", ParallelStream.class);
    try {
        // Basic test
        stream = factory.constructStream("parallel(" + COLLECTIONORALIAS + ", workers=2, sort=\"nullCount desc\", null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\", partitionKeys=id), by=\"a_i asc\"))");
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assertTrue(tuples.size() == 2);
        long nullCount = 0;
        for (Tuple t : tuples) {
            nullCount += t.getLong("nullCount");
        }
        assertEquals(nullCount, 6L);
    } finally {
        solrClientCache.close();
    }
}
Also used : StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 8 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelRankStream.

@Test
public void testParallelRankStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "5", "a_s", "hello1", "a_i", "5", "a_f", "1").add(id, "6", "a_s", "hello1", "a_i", "6", "a_f", "1").add(id, "7", "a_s", "hello1", "a_i", "7", "a_f", "1").add(id, "8", "a_s", "hello1", "a_i", "8", "a_f", "1").add(id, "9", "a_s", "hello1", "a_i", "9", "a_f", "1").add(id, "10", "a_s", "hello1", "a_i", "10", "a_f", "1").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    String zkHost = cluster.getZkServer().getZkAddress();
    StreamFactory streamFactory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, zkHost).withFunctionName("search", CloudSolrStream.class).withFunctionName("unique", UniqueStream.class).withFunctionName("top", RankStream.class).withFunctionName("group", ReducerStream.class).withFunctionName("parallel", ParallelStream.class);
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    try {
        ParallelStream pstream = (ParallelStream) streamFactory.constructStream("parallel(" + COLLECTIONORALIAS + ", " + "top(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"id,a_s,a_i\", sort=\"a_i asc\", partitionKeys=\"a_i\"), " + "n=\"11\", " + "sort=\"a_i desc\"), workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"a_i desc\")");
        pstream.setStreamContext(streamContext);
        List<Tuple> tuples = getTuples(pstream);
        assert (tuples.size() == 10);
        assertOrder(tuples, 10, 9, 8, 7, 6, 5, 4, 3, 2, 0);
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 9 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testFetchStream.

@Test
public void testFetchStream() throws Exception {
    //TODO share in @Before ; close in @After ?
    SolrClientCache solrClientCache = new SolrClientCache();
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1", "subject", "blah blah blah 0").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2", "subject", "blah blah blah 2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "subject", "blah blah blah 3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "subject", "blah blah blah 4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5", "subject", "blah blah blah 1").add(id, "5", "a_s", "hello3", "a_i", "5", "a_f", "6", "subject", "blah blah blah 5").add(id, "6", "a_s", "hello4", "a_i", "6", "a_f", "7", "subject", "blah blah blah 6").add(id, "7", "a_s", "hello3", "a_i", "7", "a_f", "8", "subject", "blah blah blah 7").add(id, "8", "a_s", "hello3", "a_i", "8", "a_f", "9", "subject", "blah blah blah 8").add(id, "9", "a_s", "hello0", "a_i", "9", "a_f", "10", "subject", "blah blah blah 9").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    TupleStream stream;
    List<Tuple> tuples;
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("fetch", FetchStream.class);
    stream = factory.constructStream("fetch(" + COLLECTIONORALIAS + ",  search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), on=\"id=a_i\", batchSize=\"2\", fl=\"subject\")");
    StreamContext context = new StreamContext();
    context.setSolrClientCache(solrClientCache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    assert (tuples.size() == 10);
    Tuple t = tuples.get(0);
    assertTrue("blah blah blah 0".equals(t.getString("subject")));
    t = tuples.get(1);
    assertTrue("blah blah blah 2".equals(t.getString("subject")));
    t = tuples.get(2);
    assertTrue("blah blah blah 3".equals(t.getString("subject")));
    t = tuples.get(3);
    assertTrue("blah blah blah 4".equals(t.getString("subject")));
    t = tuples.get(4);
    assertTrue("blah blah blah 1".equals(t.getString("subject")));
    t = tuples.get(5);
    assertTrue("blah blah blah 5".equals(t.getString("subject")));
    t = tuples.get(6);
    assertTrue("blah blah blah 6".equals(t.getString("subject")));
    t = tuples.get(7);
    assertTrue("blah blah blah 7".equals(t.getString("subject")));
    t = tuples.get(8);
    assertTrue("blah blah blah 8".equals(t.getString("subject")));
    t = tuples.get(9);
    assertTrue("blah blah blah 9".equals(t.getString("subject")));
    //Change the batch size
    stream = factory.constructStream("fetch(" + COLLECTIONORALIAS + ",  search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), on=\"id=a_i\", batchSize=\"3\", fl=\"subject\")");
    context = new StreamContext();
    context.setSolrClientCache(solrClientCache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    assert (tuples.size() == 10);
    t = tuples.get(0);
    assertTrue("blah blah blah 0".equals(t.getString("subject")));
    t = tuples.get(1);
    assertTrue("blah blah blah 2".equals(t.getString("subject")));
    t = tuples.get(2);
    assertTrue("blah blah blah 3".equals(t.getString("subject")));
    t = tuples.get(3);
    assertTrue("blah blah blah 4".equals(t.getString("subject")));
    t = tuples.get(4);
    assertTrue("blah blah blah 1".equals(t.getString("subject")));
    t = tuples.get(5);
    assertTrue("blah blah blah 5".equals(t.getString("subject")));
    t = tuples.get(6);
    assertTrue("blah blah blah 6".equals(t.getString("subject")));
    t = tuples.get(7);
    assertTrue("blah blah blah 7".equals(t.getString("subject")));
    t = tuples.get(8);
    assertTrue("blah blah blah 8".equals(t.getString("subject")));
    t = tuples.get(9);
    assertTrue("blah blah blah 9".equals(t.getString("subject")));
    // SOLR-10404 test that "hello 99" as a value gets escaped
    new UpdateRequest().add(id, "99", "a1_s", "hello 99", "a2_s", "hello 99", "subject", "blah blah blah 99").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    stream = factory.constructStream("fetch(" + COLLECTIONORALIAS + ",  search(" + COLLECTIONORALIAS + ", q=" + id + ":99, fl=\"id,a1_s\", sort=\"id asc\"), on=\"a1_s=a2_s\", fl=\"subject\")");
    context = new StreamContext();
    context.setSolrClientCache(solrClientCache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    assertEquals(1, tuples.size());
    t = tuples.get(0);
    assertTrue("blah blah blah 99".equals(t.getString("subject")));
    solrClientCache.close();
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 10 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelDaemonCommitStream.

@Test
public void testParallelDaemonCommitStream() throws Exception {
    CollectionAdminRequest.createCollection("parallelDestinationCollection1", "conf", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection1", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777").commit(cluster.getSolrClient(), "collection1");
    StreamExpression expression;
    TupleStream stream;
    Tuple t;
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    String zkHost = cluster.getZkServer().getZkAddress();
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withCollectionZkHost("parallelDestinationCollection1", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("update", UpdateStream.class).withFunctionName("commit", CommitStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("daemon", DaemonStream.class);
    try {
        //Copy all docs to destinationCollection
        String updateExpression = "daemon(commit(parallelDestinationCollection1, batchSize=0, zkHost=\"" + cluster.getZkServer().getZkAddress() + "\", update(parallelDestinationCollection1, batchSize=2, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\"))), runInterval=\"1000\", id=\"test\")";
        TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")");
        parallelUpdateStream.setStreamContext(streamContext);
        List<Tuple> tuples = getTuples(parallelUpdateStream);
        assert (tuples.size() == 2);
        //Lets sleep long enough for daemon updates to run.
        //Lets stop the daemons
        ModifiableSolrParams sParams = new ModifiableSolrParams(StreamingTest.mapParams(CommonParams.QT, "/stream", "action", "list"));
        int workersComplete = 0;
        for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
            int iterations = 0;
            INNER: while (iterations == 0) {
                SolrStream solrStream = new SolrStream(jetty.getBaseUrl().toString() + "/collection1", sParams);
                solrStream.setStreamContext(streamContext);
                solrStream.open();
                Tuple tupleResponse = solrStream.read();
                if (tupleResponse.EOF) {
                    solrStream.close();
                    break INNER;
                } else {
                    long l = tupleResponse.getLong("iterations");
                    if (l > 0) {
                        ++workersComplete;
                    } else {
                        try {
                            Thread.sleep(1000);
                        } catch (Exception e) {
                        }
                    }
                    iterations = (int) l;
                    solrStream.close();
                }
            }
        }
        assertEquals(cluster.getJettySolrRunners().size(), workersComplete);
        //Lets stop the daemons
        sParams = new ModifiableSolrParams();
        sParams.set(CommonParams.QT, "/stream");
        sParams.set("action", "stop");
        sParams.set("id", "test");
        for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
            SolrStream solrStream = new SolrStream(jetty.getBaseUrl() + "/collection1", sParams);
            solrStream.setStreamContext(streamContext);
            solrStream.open();
            Tuple tupleResponse = solrStream.read();
            solrStream.close();
        }
        sParams = new ModifiableSolrParams();
        sParams.set(CommonParams.QT, "/stream");
        sParams.set("action", "list");
        workersComplete = 0;
        for (JettySolrRunner jetty : cluster.getJettySolrRunners()) {
            long stopTime = 0;
            INNER: while (stopTime == 0) {
                SolrStream solrStream = new SolrStream(jetty.getBaseUrl() + "/collection1", sParams);
                solrStream.setStreamContext(streamContext);
                solrStream.open();
                Tuple tupleResponse = solrStream.read();
                if (tupleResponse.EOF) {
                    solrStream.close();
                    break INNER;
                } else {
                    stopTime = tupleResponse.getLong("stopTime");
                    if (stopTime > 0) {
                        ++workersComplete;
                    } else {
                        try {
                            Thread.sleep(1000);
                        } catch (Exception e) {
                        }
                    }
                    solrStream.close();
                }
            }
        }
        assertEquals(cluster.getJettySolrRunners().size(), workersComplete);
        //Ensure that destinationCollection actually has the new docs.
        expression = StreamExpressionParser.parse("search(parallelDestinationCollection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")");
        stream = new CloudSolrStream(expression, factory);
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assertEquals(5, tuples.size());
        Tuple tuple = tuples.get(0);
        assert (tuple.getLong("id") == 0);
        assert (tuple.get("a_s").equals("hello0"));
        assert (tuple.getLong("a_i") == 0);
        assert (tuple.getDouble("a_f") == 0.0);
        assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb");
        assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7"));
        tuple = tuples.get(1);
        assert (tuple.getLong("id") == 1);
        assert (tuple.get("a_s").equals("hello1"));
        assert (tuple.getLong("a_i") == 1);
        assert (tuple.getDouble("a_f") == 1.0);
        assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4");
        assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777"));
        tuple = tuples.get(2);
        assert (tuple.getLong("id") == 2);
        assert (tuple.get("a_s").equals("hello2"));
        assert (tuple.getLong("a_i") == 2);
        assert (tuple.getDouble("a_f") == 0.0);
        assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1");
        assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77"));
        tuple = tuples.get(3);
        assert (tuple.getLong("id") == 3);
        assert (tuple.get("a_s").equals("hello3"));
        assert (tuple.getLong("a_i") == 3);
        assert (tuple.getDouble("a_f") == 3.0);
        assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2");
        assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777"));
        tuple = tuples.get(4);
        assert (tuple.getLong("id") == 4);
        assert (tuple.get("a_s").equals("hello4"));
        assert (tuple.getLong("a_i") == 4);
        assert (tuple.getDouble("a_f") == 4.0);
        assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3");
        assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777"));
    } finally {
        CollectionAdminRequest.deleteCollection("parallelDestinationCollection1").process(cluster.getSolrClient());
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) JettySolrRunner(org.apache.solr.client.solrj.embedded.JettySolrRunner) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) IOException(java.io.IOException) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Aggregations

StreamFactory (org.apache.solr.client.solrj.io.stream.expr.StreamFactory)69 Tuple (org.apache.solr.client.solrj.io.Tuple)65 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)65 Test (org.junit.Test)64 SolrClientCache (org.apache.solr.client.solrj.io.SolrClientCache)61 StreamExpression (org.apache.solr.client.solrj.io.stream.expr.StreamExpression)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)14 MeanMetric (org.apache.solr.client.solrj.io.stream.metrics.MeanMetric)10 FieldComparator (org.apache.solr.client.solrj.io.comp.FieldComparator)9 MinMetric (org.apache.solr.client.solrj.io.stream.metrics.MinMetric)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)7 StreamContext (org.apache.solr.client.solrj.io.stream.StreamContext)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 Connection (java.sql.Connection)4 Statement (java.sql.Statement)4 List (java.util.List)4 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)4 AndEvaluator (org.apache.solr.client.solrj.io.eval.AndEvaluator)3