Search in sources :

Example 36 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class TopicStream method toExplanation.

@Override
public Explanation toExplanation(StreamFactory factory) throws IOException {
    StreamExplanation explanation = new StreamExplanation(getStreamNodeId().toString());
    explanation.setFunctionName(factory.getFunctionName(this.getClass()));
    explanation.setImplementingClass(this.getClass().getName());
    explanation.setExpressionType(ExpressionType.STREAM_SOURCE);
    explanation.setExpression(toExpression(factory).toString());
    {
        // child 1 is a datastore so add it at this point
        StreamExplanation child = new StreamExplanation(getStreamNodeId() + "-datastore");
        child.setFunctionName(String.format(Locale.ROOT, "solr (%s)", collection));
        // TODO: fix this so we know the # of workers - check with Joel about a Topic's ability to be in a
        // parallel stream.
        child.setImplementingClass("Solr/Lucene");
        child.setExpressionType(ExpressionType.DATASTORE);
        ModifiableSolrParams mParams = new ModifiableSolrParams(params);
        child.setExpression(mParams.getMap().entrySet().stream().map(e -> String.format(Locale.ROOT, "%s=%s", e.getKey(), e.getValue())).collect(Collectors.joining(",")));
        explanation.addChild(child);
    }
    {
        // child 2 is a place where we store and read checkpoint info from
        StreamExplanation child = new StreamExplanation(getStreamNodeId() + "-checkpoint");
        child.setFunctionName(String.format(Locale.ROOT, "solr (checkpoint store)"));
        child.setImplementingClass("Solr/Lucene");
        child.setExpressionType(ExpressionType.DATASTORE);
        child.setExpression(String.format(Locale.ROOT, "id=%s, collection=%s, checkpointEvery=%d", id, checkpointCollection, checkpointEvery));
        explanation.addChild(child);
    }
    return explanation;
}
Also used : StreamExplanation(org.apache.solr.client.solrj.io.stream.expr.StreamExplanation) VERSION_FIELD(org.apache.solr.common.params.CommonParams.VERSION_FIELD) StreamExplanation(org.apache.solr.client.solrj.io.stream.expr.StreamExplanation) Tuple(org.apache.solr.client.solrj.io.Tuple) ClusterState(org.apache.solr.common.cloud.ClusterState) LoggerFactory(org.slf4j.LoggerFactory) HashMap(java.util.HashMap) Random(java.util.Random) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) TreeSet(java.util.TreeSet) SolrParams(org.apache.solr.common.params.SolrParams) ArrayList(java.util.ArrayList) Future(java.util.concurrent.Future) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) Builder(org.apache.solr.client.solrj.impl.CloudSolrClient.Builder) ZkCoreNodeProps(org.apache.solr.common.cloud.ZkCoreNodeProps) Locale(java.util.Locale) Map(java.util.Map) Expressible(org.apache.solr.client.solrj.io.stream.expr.Expressible) FieldComparator(org.apache.solr.client.solrj.io.comp.FieldComparator) ExecutorUtil(org.apache.solr.common.util.ExecutorUtil) ExecutorService(java.util.concurrent.ExecutorService) StreamExpressionNamedParameter(org.apache.solr.client.solrj.io.stream.expr.StreamExpressionNamedParameter) SORT(org.apache.solr.common.params.CommonParams.SORT) ZkStateReader(org.apache.solr.common.cloud.ZkStateReader) Slice(org.apache.solr.common.cloud.Slice) Logger(org.slf4j.Logger) ComparatorOrder(org.apache.solr.client.solrj.io.comp.ComparatorOrder) ExpressionType(org.apache.solr.client.solrj.io.stream.expr.Explanation.ExpressionType) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) MethodHandles(java.lang.invoke.MethodHandles) Collection(java.util.Collection) Set(java.util.Set) IOException(java.io.IOException) Collectors(java.util.stream.Collectors) Explanation(org.apache.solr.client.solrj.io.stream.expr.Explanation) Replica(org.apache.solr.common.cloud.Replica) StreamExpressionValue(org.apache.solr.client.solrj.io.stream.expr.StreamExpressionValue) SolrDocument(org.apache.solr.common.SolrDocument) List(java.util.List) HttpSolrClient(org.apache.solr.client.solrj.impl.HttpSolrClient) DISTRIB(org.apache.solr.common.params.CommonParams.DISTRIB) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) ID(org.apache.solr.common.params.CommonParams.ID) Entry(java.util.Map.Entry) SolrjNamedThreadFactory(org.apache.solr.common.util.SolrjNamedThreadFactory) Collections(java.util.Collections) SolrInputDocument(org.apache.solr.common.SolrInputDocument) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams)

Example 37 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class GraphExpressionTest method testShortestPathStream.

@Test
public void testShortestPathStream() throws Exception {
    new UpdateRequest().add(id, "0", "from_s", "jim", "to_s", "mike", "predicate_s", "knows").add(id, "1", "from_s", "jim", "to_s", "dave", "predicate_s", "knows").add(id, "2", "from_s", "jim", "to_s", "stan", "predicate_s", "knows").add(id, "3", "from_s", "dave", "to_s", "stan", "predicate_s", "knows").add(id, "4", "from_s", "dave", "to_s", "bill", "predicate_s", "knows").add(id, "5", "from_s", "dave", "to_s", "mike", "predicate_s", "knows").add(id, "20", "from_s", "dave", "to_s", "alex", "predicate_s", "knows").add(id, "21", "from_s", "alex", "to_s", "steve", "predicate_s", "knows").add(id, "6", "from_s", "stan", "to_s", "alice", "predicate_s", "knows").add(id, "7", "from_s", "stan", "to_s", "mary", "predicate_s", "knows").add(id, "8", "from_s", "stan", "to_s", "dave", "predicate_s", "knows").add(id, "10", "from_s", "mary", "to_s", "mike", "predicate_s", "knows").add(id, "11", "from_s", "mary", "to_s", "max", "predicate_s", "knows").add(id, "12", "from_s", "mary", "to_s", "jim", "predicate_s", "knows").add(id, "13", "from_s", "mary", "to_s", "steve", "predicate_s", "knows").commit(cluster.getSolrClient(), COLLECTION);
    List<Tuple> tuples = null;
    Set<String> paths = null;
    ShortestPathStream stream = null;
    StreamContext context = new StreamContext();
    SolrClientCache cache = new SolrClientCache();
    context.setSolrClientCache(cache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("shortestPath", ShortestPathStream.class);
    Map params = new HashMap();
    params.put("fq", "predicate_s:knows");
    stream = (ShortestPathStream) factory.constructStream("shortestPath(collection1, " + "from=\"jim\", " + "to=\"steve\"," + "edge=\"from_s=to_s\"," + "fq=\"predicate_s:knows\"," + "threads=\"3\"," + "partitionSize=\"3\"," + "maxDepth=\"6\")");
    stream.setStreamContext(context);
    paths = new HashSet();
    tuples = getTuples(stream);
    assertTrue(tuples.size() == 2);
    for (Tuple tuple : tuples) {
        paths.add(tuple.getStrings("path").toString());
    }
    assertTrue(paths.contains("[jim, dave, alex, steve]"));
    assertTrue(paths.contains("[jim, stan, mary, steve]"));
    //Test with batch size of 1
    params.put("fq", "predicate_s:knows");
    stream = (ShortestPathStream) factory.constructStream("shortestPath(collection1, " + "from=\"jim\", " + "to=\"steve\"," + "edge=\"from_s=to_s\"," + "fq=\"predicate_s:knows\"," + "threads=\"3\"," + "partitionSize=\"1\"," + "maxDepth=\"6\")");
    stream.setStreamContext(context);
    paths = new HashSet();
    tuples = getTuples(stream);
    assertTrue(tuples.size() == 2);
    for (Tuple tuple : tuples) {
        paths.add(tuple.getStrings("path").toString());
    }
    assertTrue(paths.contains("[jim, dave, alex, steve]"));
    assertTrue(paths.contains("[jim, stan, mary, steve]"));
    //Test with bad predicate
    stream = (ShortestPathStream) factory.constructStream("shortestPath(collection1, " + "from=\"jim\", " + "to=\"steve\"," + "edge=\"from_s=to_s\"," + "fq=\"predicate_s:crap\"," + "threads=\"3\"," + "partitionSize=\"3\"," + "maxDepth=\"6\")");
    stream.setStreamContext(context);
    paths = new HashSet();
    tuples = getTuples(stream);
    assertTrue(tuples.size() == 0);
    //Test with depth 2
    stream = (ShortestPathStream) factory.constructStream("shortestPath(collection1, " + "from=\"jim\", " + "to=\"steve\"," + "edge=\"from_s=to_s\"," + "fq=\"predicate_s:knows\"," + "threads=\"3\"," + "partitionSize=\"3\"," + "maxDepth=\"2\")");
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    assertTrue(tuples.size() == 0);
    //Take out alex
    params.put("fq", "predicate_s:knows NOT to_s:alex");
    stream = (ShortestPathStream) factory.constructStream("shortestPath(collection1, " + "from=\"jim\", " + "to=\"steve\"," + "edge=\"from_s=to_s\"," + "fq=\" predicate_s:knows NOT to_s:alex\"," + "threads=\"3\"," + "partitionSize=\"3\"," + "maxDepth=\"6\")");
    stream.setStreamContext(context);
    paths = new HashSet();
    tuples = getTuples(stream);
    assertTrue(tuples.size() == 1);
    for (Tuple tuple : tuples) {
        paths.add(tuple.getStrings("path").toString());
    }
    assertTrue(paths.contains("[jim, stan, mary, steve]"));
    cache.close();
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) HashMap(java.util.HashMap) StreamContext(org.apache.solr.client.solrj.io.stream.StreamContext) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) HashMap(java.util.HashMap) Map(java.util.Map) Tuple(org.apache.solr.client.solrj.io.Tuple) HashSet(java.util.HashSet) Test(org.junit.Test)

Example 38 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class GraphExpressionTest method testScoreNodesFacetStream.

@Test
public void testScoreNodesFacetStream() throws Exception {
    new UpdateRequest().add(id, "0", "basket_s", "basket1", "product_ss", "product1", "product_ss", "product3", "product_ss", "product5", "price_f", "1").add(id, "3", "basket_s", "basket2", "product_ss", "product1", "product_ss", "product6", "product_ss", "product7", "price_f", "1").add(id, "6", "basket_s", "basket3", "product_ss", "product4", "product_ss", "product3", "product_ss", "product1", "price_f", "1").add(id, "9", "basket_s", "basket4", "product_ss", "product4", "product_ss", "product3", "product_ss", "product1", "price_f", "1").commit(cluster.getSolrClient(), COLLECTION);
    List<Tuple> tuples = null;
    TupleStream stream = null;
    StreamContext context = new StreamContext();
    SolrClientCache cache = new SolrClientCache();
    context.setSolrClientCache(cache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withDefaultZkHost(cluster.getZkServer().getZkAddress()).withFunctionName("gatherNodes", GatherNodesStream.class).withFunctionName("scoreNodes", ScoreNodesStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("facet", FacetStream.class).withFunctionName("sort", SortStream.class).withFunctionName("count", CountMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class);
    String expr = "sort(by=\"nodeScore desc\",scoreNodes(facet(collection1, q=\"product_ss:product3\", buckets=\"product_ss\", bucketSorts=\"count(*) desc\", bucketSizeLimit=100, count(*))))";
    stream = factory.constructStream(expr);
    context = new StreamContext();
    context.setSolrClientCache(cache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    Tuple tuple = tuples.get(0);
    assert (tuple.getString("node").equals("product3"));
    assert (tuple.getLong("docFreq") == 3);
    assert (tuple.getLong("count(*)") == 3);
    Tuple tuple0 = tuples.get(1);
    assert (tuple0.getString("node").equals("product4"));
    assert (tuple0.getLong("docFreq") == 2);
    assert (tuple0.getLong("count(*)") == 2);
    Tuple tuple1 = tuples.get(2);
    assert (tuple1.getString("node").equals("product1"));
    assert (tuple1.getLong("docFreq") == 4);
    assert (tuple1.getLong("count(*)") == 3);
    Tuple tuple2 = tuples.get(3);
    assert (tuple2.getString("node").equals("product5"));
    assert (tuple2.getLong("docFreq") == 1);
    assert (tuple2.getLong("count(*)") == 1);
    cache.close();
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamContext(org.apache.solr.client.solrj.io.stream.StreamContext) MinMetric(org.apache.solr.client.solrj.io.stream.metrics.MinMetric) SortStream(org.apache.solr.client.solrj.io.stream.SortStream) TupleStream(org.apache.solr.client.solrj.io.stream.TupleStream) MeanMetric(org.apache.solr.client.solrj.io.stream.metrics.MeanMetric) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) CloudSolrStream(org.apache.solr.client.solrj.io.stream.CloudSolrStream) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 39 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class GraphExpressionTest method testScoreNodesStream.

@Test
public void testScoreNodesStream() throws Exception {
    new UpdateRequest().add(id, "0", "basket_s", "basket1", "product_s", "product1", "price_f", "1").add(id, "1", "basket_s", "basket1", "product_s", "product3", "price_f", "1").add(id, "2", "basket_s", "basket1", "product_s", "product5", "price_f", "100").add(id, "3", "basket_s", "basket2", "product_s", "product1", "price_f", "1").add(id, "4", "basket_s", "basket2", "product_s", "product6", "price_f", "1").add(id, "5", "basket_s", "basket2", "product_s", "product7", "price_f", "1").add(id, "6", "basket_s", "basket3", "product_s", "product4", "price_f", "1").add(id, "7", "basket_s", "basket3", "product_s", "product3", "price_f", "1").add(id, "8", "basket_s", "basket3", "product_s", "product1", "price_f", "1").add(id, "9", "basket_s", "basket4", "product_s", "product4", "price_f", "1").add(id, "10", "basket_s", "basket4", "product_s", "product3", "price_f", "1").add(id, "11", "basket_s", "basket4", "product_s", "product1", "price_f", "1").add(id, "12", "basket_s", "basket5", "product_s", "product1", "price_f", "1").add(id, "13", "basket_s", "basket6", "product_s", "product1", "price_f", "1").add(id, "14", "basket_s", "basket7", "product_s", "product1", "price_f", "1").add(id, "15", "basket_s", "basket4", "product_s", "product1", "price_f", "1").commit(cluster.getSolrClient(), COLLECTION);
    List<Tuple> tuples = null;
    TupleStream stream = null;
    StreamContext context = new StreamContext();
    SolrClientCache cache = new SolrClientCache();
    context.setSolrClientCache(cache);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withDefaultZkHost(cluster.getZkServer().getZkAddress()).withFunctionName("gatherNodes", GatherNodesStream.class).withFunctionName("scoreNodes", ScoreNodesStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("sort", SortStream.class).withFunctionName("count", CountMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class);
    String expr = "gatherNodes(collection1, " + "walk=\"product3->product_s\"," + "gather=\"basket_s\")";
    String expr2 = "sort(by=\"nodeScore desc\", " + "scoreNodes(gatherNodes(collection1, " + expr + "," + "walk=\"node->basket_s\"," + "gather=\"product_s\", " + "count(*), " + "avg(price_f), " + "sum(price_f), " + "min(price_f), " + "max(price_f))))";
    stream = factory.constructStream(expr2);
    context = new StreamContext();
    context.setSolrClientCache(cache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    Tuple tuple0 = tuples.get(0);
    assert (tuple0.getString("node").equals("product4"));
    assert (tuple0.getLong("docFreq") == 2);
    assert (tuple0.getLong("count(*)") == 2);
    Tuple tuple1 = tuples.get(1);
    assert (tuple1.getString("node").equals("product1"));
    assert (tuple1.getLong("docFreq") == 8);
    assert (tuple1.getLong("count(*)") == 3);
    Tuple tuple2 = tuples.get(2);
    assert (tuple2.getString("node").equals("product5"));
    assert (tuple2.getLong("docFreq") == 1);
    assert (tuple2.getLong("count(*)") == 1);
    //Test using a different termFreq field then the default count(*)
    expr2 = "sort(by=\"nodeScore desc\", " + "scoreNodes(termFreq=\"avg(price_f)\",gatherNodes(collection1, " + expr + "," + "walk=\"node->basket_s\"," + "gather=\"product_s\", " + "count(*), " + "avg(price_f), " + "sum(price_f), " + "min(price_f), " + "max(price_f))))";
    stream = factory.constructStream(expr2);
    context = new StreamContext();
    context.setSolrClientCache(cache);
    stream.setStreamContext(context);
    tuples = getTuples(stream);
    tuple0 = tuples.get(0);
    assert (tuple0.getString("node").equals("product5"));
    assert (tuple0.getLong("docFreq") == 1);
    assert (tuple0.getDouble("avg(price_f)") == 100);
    tuple1 = tuples.get(1);
    assert (tuple1.getString("node").equals("product4"));
    assert (tuple1.getLong("docFreq") == 2);
    assert (tuple1.getDouble("avg(price_f)") == 1);
    tuple2 = tuples.get(2);
    assert (tuple2.getString("node").equals("product1"));
    assert (tuple2.getLong("docFreq") == 8);
    assert (tuple2.getDouble("avg(price_f)") == 1);
    cache.close();
}
Also used : SortStream(org.apache.solr.client.solrj.io.stream.SortStream) ScoreNodesStream(org.apache.solr.client.solrj.io.stream.ScoreNodesStream) TupleStream(org.apache.solr.client.solrj.io.stream.TupleStream) MeanMetric(org.apache.solr.client.solrj.io.stream.metrics.MeanMetric) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamContext(org.apache.solr.client.solrj.io.stream.StreamContext) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) MinMetric(org.apache.solr.client.solrj.io.stream.metrics.MinMetric) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 40 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testTopicStream.

@Test
public void testTopicStream() throws Exception {
    Assume.assumeTrue(!useAlias);
    new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    SolrClientCache cache = new SolrClientCache();
    try {
        //Store checkpoints in the same index as the main documents. This perfectly valid
        expression = StreamExpressionParser.parse("topic(collection1, collection1, q=\"a_s:hello\", fl=\"id\", id=\"1000000\", checkpointEvery=3)");
        stream = factory.constructStream(expression);
        StreamContext context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        //Should be zero because the checkpoints will be set to the highest vesion on the shards.
        assertEquals(tuples.size(), 0);
        cluster.getSolrClient().commit("collection1");
        //Now check to see if the checkpoints are present
        expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        assertEquals(tuples.size(), 1);
        List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
        assertEquals(checkpoints.size(), 2);
        Long version1 = tuples.get(0).getLong("_version_");
        //Index a few more documents
        new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        expression = StreamExpressionParser.parse("topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2)");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        try {
            stream.open();
            Tuple tuple1 = stream.read();
            assertEquals((long) tuple1.getLong("id"), 10l);
            cluster.getSolrClient().commit("collection1");
            // Checkpoint should not have changed.
            expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
            TupleStream cstream = factory.constructStream(expression);
            context = new StreamContext();
            context.setSolrClientCache(cache);
            cstream.setStreamContext(context);
            tuples = getTuples(cstream);
            assertEquals(tuples.size(), 1);
            checkpoints = tuples.get(0).getStrings("checkpoint_ss");
            assertEquals(checkpoints.size(), 2);
            Long version2 = tuples.get(0).getLong("_version_");
            assertEquals(version1, version2);
            Tuple tuple2 = stream.read();
            cluster.getSolrClient().commit("collection1");
            assertEquals((long) tuple2.getLong("id"), 11l);
            //Checkpoint should have changed.
            expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
            cstream = factory.constructStream(expression);
            context = new StreamContext();
            context.setSolrClientCache(cache);
            cstream.setStreamContext(context);
            tuples = getTuples(cstream);
            assertEquals(tuples.size(), 1);
            checkpoints = tuples.get(0).getStrings("checkpoint_ss");
            assertEquals(checkpoints.size(), 2);
            Long version3 = tuples.get(0).getLong("_version_");
            assertTrue(version3 > version2);
            Tuple tuple3 = stream.read();
            assertTrue(tuple3.EOF);
        } finally {
            stream.close();
        }
        //Test with the DaemonStream
        DaemonStream dstream = null;
        try {
            expression = StreamExpressionParser.parse("daemon(topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
            dstream = (DaemonStream) factory.constructStream(expression);
            context = new StreamContext();
            context.setSolrClientCache(cache);
            dstream.setStreamContext(context);
            //Index a few more documents
            new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
            //Start reading from the DaemonStream
            Tuple tuple = null;
            dstream.open();
            tuple = dstream.read();
            assertEquals(12, (long) tuple.getLong(id));
            tuple = dstream.read();
            assertEquals(13, (long) tuple.getLong(id));
            // We want to see if the version has been updated after reading two tuples
            cluster.getSolrClient().commit("collection1");
            //Index a few more documents
            new UpdateRequest().add(id, "14", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "15", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
            //Read from the same DaemonStream stream
            tuple = dstream.read();
            assertEquals(14, (long) tuple.getLong(id));
            // This should trigger a checkpoint as it's the 4th read from the stream.
            tuple = dstream.read();
            assertEquals(15, (long) tuple.getLong(id));
            dstream.shutdown();
            tuple = dstream.read();
            assertTrue(tuple.EOF);
        } finally {
            dstream.close();
        }
    } finally {
        cache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Aggregations

StreamFactory (org.apache.solr.client.solrj.io.stream.expr.StreamFactory)69 Tuple (org.apache.solr.client.solrj.io.Tuple)65 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)65 Test (org.junit.Test)64 SolrClientCache (org.apache.solr.client.solrj.io.SolrClientCache)61 StreamExpression (org.apache.solr.client.solrj.io.stream.expr.StreamExpression)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)14 MeanMetric (org.apache.solr.client.solrj.io.stream.metrics.MeanMetric)10 FieldComparator (org.apache.solr.client.solrj.io.comp.FieldComparator)9 MinMetric (org.apache.solr.client.solrj.io.stream.metrics.MinMetric)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)7 StreamContext (org.apache.solr.client.solrj.io.stream.StreamContext)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 Connection (java.sql.Connection)4 Statement (java.sql.Statement)4 List (java.util.List)4 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)4 AndEvaluator (org.apache.solr.client.solrj.io.eval.AndEvaluator)3