Search in sources :

Example 56 with SolrClientCache

use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.

the class GraphExpressionTest method testScoreNodesFacetStream.

@Test
public void testScoreNodesFacetStream() throws Exception {
    new UpdateRequest().add(id, "0", "basket_s", "basket1", "product_ss", "product1", "product_ss", "product3", "product_ss", "product5", "price_f", "1").add(id, "3", "basket_s", "basket2", "product_ss", "product1", "product_ss", "product6", "product_ss", "product7", "price_f", "1").add(id, "6", "basket_s", "basket3", "product_ss", "product4", "product_ss", "product3", "product_ss", "product1", "price_f", "1").add(id, "9", "basket_s", "basket4", "product_ss", "product4", "product_ss", "product3", "product_ss", "product1", "price_f", "1").commit(cluster.getSolrClient(), COLLECTION);
    List<Tuple> tuples = null;
    TupleStream stream = null;
    StreamContext context = new StreamContext();
    SolrClientCache cache = new SolrClientCache();
    context.setSolrClientCache(cache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withDefaultZkHost(cluster.getZkServer().getZkAddress()).withFunctionName("gatherNodes", GatherNodesStream.class).withFunctionName("scoreNodes", ScoreNodesStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("facet", FacetStream.class).withFunctionName("sort", SortStream.class).withFunctionName("count", CountMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class);
    String expr = "sort(by=\"nodeScore desc\",scoreNodes(facet(collection1, q=\"product_ss:product3\", buckets=\"product_ss\", bucketSorts=\"count(*) desc\", bucketSizeLimit=100, count(*))))";
    stream = factory.constructStream(expr);
    context = new StreamContext();
    context.setSolrClientCache(cache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    Tuple tuple = tuples.get(0);
    assert (tuple.getString("node").equals("product3"));
    assert (tuple.getLong("docFreq") == 3);
    assert (tuple.getLong("count(*)") == 3);
    Tuple tuple0 = tuples.get(1);
    assert (tuple0.getString("node").equals("product4"));
    assert (tuple0.getLong("docFreq") == 2);
    assert (tuple0.getLong("count(*)") == 2);
    Tuple tuple1 = tuples.get(2);
    assert (tuple1.getString("node").equals("product1"));
    assert (tuple1.getLong("docFreq") == 4);
    assert (tuple1.getLong("count(*)") == 3);
    Tuple tuple2 = tuples.get(3);
    assert (tuple2.getString("node").equals("product5"));
    assert (tuple2.getLong("docFreq") == 1);
    assert (tuple2.getLong("count(*)") == 1);
    cache.close();
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamContext(org.apache.solr.client.solrj.io.stream.StreamContext) MinMetric(org.apache.solr.client.solrj.io.stream.metrics.MinMetric) SortStream(org.apache.solr.client.solrj.io.stream.SortStream) TupleStream(org.apache.solr.client.solrj.io.stream.TupleStream) MeanMetric(org.apache.solr.client.solrj.io.stream.metrics.MeanMetric) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) CloudSolrStream(org.apache.solr.client.solrj.io.stream.CloudSolrStream) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 57 with SolrClientCache

use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.

the class GraphExpressionTest method testScoreNodesStream.

@Test
public void testScoreNodesStream() throws Exception {
    new UpdateRequest().add(id, "0", "basket_s", "basket1", "product_s", "product1", "price_f", "1").add(id, "1", "basket_s", "basket1", "product_s", "product3", "price_f", "1").add(id, "2", "basket_s", "basket1", "product_s", "product5", "price_f", "100").add(id, "3", "basket_s", "basket2", "product_s", "product1", "price_f", "1").add(id, "4", "basket_s", "basket2", "product_s", "product6", "price_f", "1").add(id, "5", "basket_s", "basket2", "product_s", "product7", "price_f", "1").add(id, "6", "basket_s", "basket3", "product_s", "product4", "price_f", "1").add(id, "7", "basket_s", "basket3", "product_s", "product3", "price_f", "1").add(id, "8", "basket_s", "basket3", "product_s", "product1", "price_f", "1").add(id, "9", "basket_s", "basket4", "product_s", "product4", "price_f", "1").add(id, "10", "basket_s", "basket4", "product_s", "product3", "price_f", "1").add(id, "11", "basket_s", "basket4", "product_s", "product1", "price_f", "1").add(id, "12", "basket_s", "basket5", "product_s", "product1", "price_f", "1").add(id, "13", "basket_s", "basket6", "product_s", "product1", "price_f", "1").add(id, "14", "basket_s", "basket7", "product_s", "product1", "price_f", "1").add(id, "15", "basket_s", "basket4", "product_s", "product1", "price_f", "1").commit(cluster.getSolrClient(), COLLECTION);
    List<Tuple> tuples = null;
    TupleStream stream = null;
    StreamContext context = new StreamContext();
    SolrClientCache cache = new SolrClientCache();
    context.setSolrClientCache(cache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withDefaultZkHost(cluster.getZkServer().getZkAddress()).withFunctionName("gatherNodes", GatherNodesStream.class).withFunctionName("scoreNodes", ScoreNodesStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("sort", SortStream.class).withFunctionName("count", CountMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class);
    String expr = "gatherNodes(collection1, " + "walk=\"product3->product_s\"," + "gather=\"basket_s\")";
    String expr2 = "sort(by=\"nodeScore desc\", " + "scoreNodes(gatherNodes(collection1, " + expr + "," + "walk=\"node->basket_s\"," + "gather=\"product_s\", " + "count(*), " + "avg(price_f), " + "sum(price_f), " + "min(price_f), " + "max(price_f))))";
    stream = factory.constructStream(expr2);
    context = new StreamContext();
    context.setSolrClientCache(cache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    Tuple tuple0 = tuples.get(0);
    assert (tuple0.getString("node").equals("product4"));
    assert (tuple0.getLong("docFreq") == 2);
    assert (tuple0.getLong("count(*)") == 2);
    Tuple tuple1 = tuples.get(1);
    assert (tuple1.getString("node").equals("product1"));
    assert (tuple1.getLong("docFreq") == 8);
    assert (tuple1.getLong("count(*)") == 3);
    Tuple tuple2 = tuples.get(2);
    assert (tuple2.getString("node").equals("product5"));
    assert (tuple2.getLong("docFreq") == 1);
    assert (tuple2.getLong("count(*)") == 1);
    //Test using a different termFreq field then the default count(*)
    expr2 = "sort(by=\"nodeScore desc\", " + "scoreNodes(termFreq=\"avg(price_f)\",gatherNodes(collection1, " + expr + "," + "walk=\"node->basket_s\"," + "gather=\"product_s\", " + "count(*), " + "avg(price_f), " + "sum(price_f), " + "min(price_f), " + "max(price_f))))";
    stream = factory.constructStream(expr2);
    context = new StreamContext();
    context.setSolrClientCache(cache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    tuple0 = tuples.get(0);
    assert (tuple0.getString("node").equals("product5"));
    assert (tuple0.getLong("docFreq") == 1);
    assert (tuple0.getDouble("avg(price_f)") == 100);
    tuple1 = tuples.get(1);
    assert (tuple1.getString("node").equals("product4"));
    assert (tuple1.getLong("docFreq") == 2);
    assert (tuple1.getDouble("avg(price_f)") == 1);
    tuple2 = tuples.get(2);
    assert (tuple2.getString("node").equals("product1"));
    assert (tuple2.getLong("docFreq") == 8);
    assert (tuple2.getDouble("avg(price_f)") == 1);
    cache.close();
}
Also used : SortStream(org.apache.solr.client.solrj.io.stream.SortStream) ScoreNodesStream(org.apache.solr.client.solrj.io.stream.ScoreNodesStream) TupleStream(org.apache.solr.client.solrj.io.stream.TupleStream) MeanMetric(org.apache.solr.client.solrj.io.stream.metrics.MeanMetric) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamContext(org.apache.solr.client.solrj.io.stream.StreamContext) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) MinMetric(org.apache.solr.client.solrj.io.stream.metrics.MinMetric) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 58 with SolrClientCache

use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.

the class StreamExpressionTest method testTopicStream.

@Test
public void testTopicStream() throws Exception {
    Assume.assumeTrue(!useAlias);
    new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    SolrClientCache cache = new SolrClientCache();
    try {
        //Store checkpoints in the same index as the main documents. This perfectly valid
        expression = StreamExpressionParser.parse("topic(collection1, collection1, q=\"a_s:hello\", fl=\"id\", id=\"1000000\", checkpointEvery=3)");
        stream = factory.constructStream(expression);
        StreamContext context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        //Should be zero because the checkpoints will be set to the highest vesion on the shards.
        assertEquals(tuples.size(), 0);
        cluster.getSolrClient().commit("collection1");
        //Now check to see if the checkpoints are present
        expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        assertEquals(tuples.size(), 1);
        List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
        assertEquals(checkpoints.size(), 2);
        Long version1 = tuples.get(0).getLong("_version_");
        //Index a few more documents
        new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        expression = StreamExpressionParser.parse("topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2)");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        try {
            stream.open();
            Tuple tuple1 = stream.read();
            assertEquals((long) tuple1.getLong("id"), 10l);
            cluster.getSolrClient().commit("collection1");
            // Checkpoint should not have changed.
            expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
            TupleStream cstream = factory.constructStream(expression);
            context = new StreamContext();
            context.setSolrClientCache(cache);
            cstream.setStreamContext(context);
            tuples = getTuples(cstream);
            assertEquals(tuples.size(), 1);
            checkpoints = tuples.get(0).getStrings("checkpoint_ss");
            assertEquals(checkpoints.size(), 2);
            Long version2 = tuples.get(0).getLong("_version_");
            assertEquals(version1, version2);
            Tuple tuple2 = stream.read();
            cluster.getSolrClient().commit("collection1");
            assertEquals((long) tuple2.getLong("id"), 11l);
            //Checkpoint should have changed.
            expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
            cstream = factory.constructStream(expression);
            context = new StreamContext();
            context.setSolrClientCache(cache);
            cstream.setStreamContext(context);
            tuples = getTuples(cstream);
            assertEquals(tuples.size(), 1);
            checkpoints = tuples.get(0).getStrings("checkpoint_ss");
            assertEquals(checkpoints.size(), 2);
            Long version3 = tuples.get(0).getLong("_version_");
            assertTrue(version3 > version2);
            Tuple tuple3 = stream.read();
            assertTrue(tuple3.EOF);
        } finally {
            stream.close();
        }
        //Test with the DaemonStream
        DaemonStream dstream = null;
        try {
            expression = StreamExpressionParser.parse("daemon(topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
            dstream = (DaemonStream) factory.constructStream(expression);
            context = new StreamContext();
            context.setSolrClientCache(cache);
            dstream.setStreamContext(context);
            //Index a few more documents
            new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
            //Start reading from the DaemonStream
            Tuple tuple = null;
            dstream.open();
            tuple = dstream.read();
            assertEquals(12, (long) tuple.getLong(id));
            tuple = dstream.read();
            assertEquals(13, (long) tuple.getLong(id));
            // We want to see if the version has been updated after reading two tuples
            cluster.getSolrClient().commit("collection1");
            //Index a few more documents
            new UpdateRequest().add(id, "14", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "15", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
            //Read from the same DaemonStream stream
            tuple = dstream.read();
            assertEquals(14, (long) tuple.getLong(id));
            // This should trigger a checkpoint as it's the 4th read from the stream.
            tuple = dstream.read();
            assertEquals(15, (long) tuple.getLong(id));
            dstream.shutdown();
            tuple = dstream.read();
            assertTrue(tuple.EOF);
        } finally {
            dstream.close();
        }
    } finally {
        cache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 59 with SolrClientCache

use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.

the class StreamExpressionTest method testDaemonStream.

@Test
public void testDaemonStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    DaemonStream daemonStream;
    expression = StreamExpressionParser.parse("daemon(rollup(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"a_i,a_s\", sort=\"a_s asc\")," + "over=\"a_s\"," + "sum(a_i)" + "), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
    daemonStream = (DaemonStream) factory.constructStream(expression);
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    daemonStream.setStreamContext(streamContext);
    try {
        //Test Long and Double Sums
        // This will start the daemon thread
        daemonStream.open();
        for (int i = 0; i < 4; i++) {
            // Reads from the queue
            Tuple tuple = daemonStream.read();
            String bucket = tuple.getString("a_s");
            Double sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 1:"+bucket);
            assertTrue(bucket.equals("hello0"));
            assertTrue(sumi.doubleValue() == 17.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 2:"+bucket);
            assertTrue(bucket.equals("hello3"));
            assertTrue(sumi.doubleValue() == 38.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 3:"+bucket);
            assertTrue(bucket.equals("hello4"));
            assertTrue(sumi.longValue() == 15);
        }
        while (daemonStream.remainingCapacity() > 0) {
            try {
                Thread.sleep(1000);
            } catch (Exception e) {
            }
        }
        //OK capacity is full, let's index a new doc
        new UpdateRequest().add(id, "10", "a_s", "hello0", "a_i", "1", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        //have the tuples with the updated count.
        for (int i = 0; i < 12; i++) {
            daemonStream.read();
        }
        //And rerun the loop. It should have a new count for hello0
        for (int i = 0; i < 4; i++) {
            // Reads from the queue
            Tuple tuple = daemonStream.read();
            String bucket = tuple.getString("a_s");
            Double sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 1:"+bucket);
            assertTrue(bucket.equals("hello0"));
            assertTrue(sumi.doubleValue() == 18.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 2:"+bucket);
            assertTrue(bucket.equals("hello3"));
            assertTrue(sumi.doubleValue() == 38.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 3:"+bucket);
            assertTrue(bucket.equals("hello4"));
            assertTrue(sumi.longValue() == 15);
        }
    } finally {
        //This should stop the daemon thread
        daemonStream.close();
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) CountMetric(org.apache.solr.client.solrj.io.stream.metrics.CountMetric) MaxMetric(org.apache.solr.client.solrj.io.stream.metrics.MaxMetric) IOException(java.io.IOException) SumMetric(org.apache.solr.client.solrj.io.stream.metrics.SumMetric) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 60 with SolrClientCache

use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.

the class StreamExpressionTest method testParallelTopicStream.

@Test
public void testParallelTopicStream() throws Exception {
    Assume.assumeTrue(!useAlias);
    new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1", "subject", "ha ha bla blah0").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2", "subject", "ha ha bla blah2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3", "subject", "ha ha bla blah3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4", "subject", "ha ha bla blah4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5", "subject", "ha ha bla blah5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6", "subject", "ha ha bla blah6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7", "subject", "ha ha bla blah7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8", "subject", "ha ha bla blah8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9", "subject", "ha ha bla blah9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10", "subject", "ha ha bla blah10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    SolrClientCache cache = new SolrClientCache();
    try {
        //Store checkpoints in the same index as the main documents. This is perfectly valid
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        StreamContext context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        //Should be zero because the checkpoints will be set to the highest version on the shards.
        assertEquals(tuples.size(), 0);
        cluster.getSolrClient().commit("collection1");
        //Now check to see if the checkpoints are present
        expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000*\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        assertEquals(tuples.size(), 2);
        List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
        assertEquals(checkpoints.size(), 2);
        String id1 = tuples.get(0).getString("id");
        String id2 = tuples.get(1).getString("id");
        assertTrue(id1.equals("1000000_0"));
        assertTrue(id2.equals("1000000_1"));
        //Index a few more documents
        new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicRun(stream, "10", "11");
        //Test will initial checkpoint. This should pull all
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"2000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicRun(stream, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11");
        //Add more documents
        //Index a few more documents
        new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        //Run the same topic again including the initialCheckpoint. It should start where it left off.
        //initialCheckpoint should be ignored for all but the first run.
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicRun(stream, "12", "13");
        //Test text extraction
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"subject:bla\", " + "fl=\"subject\", " + "id=\"3000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicSubject(stream, "ha ha bla blah0", "ha ha bla blah1", "ha ha bla blah2", "ha ha bla blah3", "ha ha bla blah4", "ha ha bla blah5", "ha ha bla blah6", "ha ha bla blah7", "ha ha bla blah8", "ha ha bla blah9", "ha ha bla blah10");
    } finally {
        cache.close();
    }
}
Also used : StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Aggregations

SolrClientCache (org.apache.solr.client.solrj.io.SolrClientCache)96 Tuple (org.apache.solr.client.solrj.io.Tuple)92 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)88 Test (org.junit.Test)88 StreamFactory (org.apache.solr.client.solrj.io.stream.expr.StreamFactory)61 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)39 StreamExpression (org.apache.solr.client.solrj.io.stream.expr.StreamExpression)36 SolrParams (org.apache.solr.common.params.SolrParams)27 FieldComparator (org.apache.solr.client.solrj.io.comp.FieldComparator)20 MultipleFieldComparator (org.apache.solr.client.solrj.io.comp.MultipleFieldComparator)14 MeanMetric (org.apache.solr.client.solrj.io.stream.metrics.MeanMetric)13 MinMetric (org.apache.solr.client.solrj.io.stream.metrics.MinMetric)12 CountMetric (org.apache.solr.client.solrj.io.stream.metrics.CountMetric)7 MaxMetric (org.apache.solr.client.solrj.io.stream.metrics.MaxMetric)7 IOException (java.io.IOException)6 HashMap (java.util.HashMap)6 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)6 FieldEqualitor (org.apache.solr.client.solrj.io.eq.FieldEqualitor)6 StreamContext (org.apache.solr.client.solrj.io.stream.StreamContext)6 SumMetric (org.apache.solr.client.solrj.io.stream.metrics.SumMetric)6