Search in sources :

Example 41 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testDaemonStream.

@Test
public void testDaemonStream() throws Exception {
    new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    DaemonStream daemonStream;
    expression = StreamExpressionParser.parse("daemon(rollup(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"a_i,a_s\", sort=\"a_s asc\")," + "over=\"a_s\"," + "sum(a_i)" + "), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
    daemonStream = (DaemonStream) factory.constructStream(expression);
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    daemonStream.setStreamContext(streamContext);
    try {
        //Test Long and Double Sums
        // This will start the daemon thread
        daemonStream.open();
        for (int i = 0; i < 4; i++) {
            // Reads from the queue
            Tuple tuple = daemonStream.read();
            String bucket = tuple.getString("a_s");
            Double sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 1:"+bucket);
            assertTrue(bucket.equals("hello0"));
            assertTrue(sumi.doubleValue() == 17.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 2:"+bucket);
            assertTrue(bucket.equals("hello3"));
            assertTrue(sumi.doubleValue() == 38.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 3:"+bucket);
            assertTrue(bucket.equals("hello4"));
            assertTrue(sumi.longValue() == 15);
        }
        while (daemonStream.remainingCapacity() > 0) {
            try {
                Thread.sleep(1000);
            } catch (Exception e) {
            }
        }
        //OK capacity is full, let's index a new doc
        new UpdateRequest().add(id, "10", "a_s", "hello0", "a_i", "1", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        //have the tuples with the updated count.
        for (int i = 0; i < 12; i++) {
            daemonStream.read();
        }
        //And rerun the loop. It should have a new count for hello0
        for (int i = 0; i < 4; i++) {
            // Reads from the queue
            Tuple tuple = daemonStream.read();
            String bucket = tuple.getString("a_s");
            Double sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 1:"+bucket);
            assertTrue(bucket.equals("hello0"));
            assertTrue(sumi.doubleValue() == 18.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 2:"+bucket);
            assertTrue(bucket.equals("hello3"));
            assertTrue(sumi.doubleValue() == 38.0D);
            tuple = daemonStream.read();
            bucket = tuple.getString("a_s");
            sumi = tuple.getDouble("sum(a_i)");
            //System.out.println("#################################### Bucket 3:"+bucket);
            assertTrue(bucket.equals("hello4"));
            assertTrue(sumi.longValue() == 15);
        }
    } finally {
        //This should stop the daemon thread
        daemonStream.close();
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) CountMetric(org.apache.solr.client.solrj.io.stream.metrics.CountMetric) MaxMetric(org.apache.solr.client.solrj.io.stream.metrics.MaxMetric) IOException(java.io.IOException) SumMetric(org.apache.solr.client.solrj.io.stream.metrics.SumMetric) StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 42 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testParallelTopicStream.

@Test
public void testParallelTopicStream() throws Exception {
    Assume.assumeTrue(!useAlias);
    new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1", "subject", "ha ha bla blah0").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2", "subject", "ha ha bla blah2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3", "subject", "ha ha bla blah3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4", "subject", "ha ha bla blah4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5", "subject", "ha ha bla blah5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6", "subject", "ha ha bla blah6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7", "subject", "ha ha bla blah7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8", "subject", "ha ha bla blah8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9", "subject", "ha ha bla blah9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10", "subject", "ha ha bla blah10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("daemon", DaemonStream.class);
    StreamExpression expression;
    TupleStream stream;
    List<Tuple> tuples;
    SolrClientCache cache = new SolrClientCache();
    try {
        //Store checkpoints in the same index as the main documents. This is perfectly valid
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        StreamContext context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        //Should be zero because the checkpoints will be set to the highest version on the shards.
        assertEquals(tuples.size(), 0);
        cluster.getSolrClient().commit("collection1");
        //Now check to see if the checkpoints are present
        expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000*\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        tuples = getTuples(stream);
        assertEquals(tuples.size(), 2);
        List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
        assertEquals(checkpoints.size(), 2);
        String id1 = tuples.get(0).getString("id");
        String id2 = tuples.get(1).getString("id");
        assertTrue(id1.equals("1000000_0"));
        assertTrue(id2.equals("1000000_1"));
        //Index a few more documents
        new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicRun(stream, "10", "11");
        //Test will initial checkpoint. This should pull all
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"2000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicRun(stream, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11");
        //Add more documents
        //Index a few more documents
        new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
        //Run the same topic again including the initialCheckpoint. It should start where it left off.
        //initialCheckpoint should be ignored for all but the first run.
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicRun(stream, "12", "13");
        //Test text extraction
        expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"subject:bla\", " + "fl=\"subject\", " + "id=\"3000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
        stream = factory.constructStream(expression);
        context = new StreamContext();
        context.setSolrClientCache(cache);
        stream.setStreamContext(context);
        assertTopicSubject(stream, "ha ha bla blah0", "ha ha bla blah1", "ha ha bla blah2", "ha ha bla blah3", "ha ha bla blah4", "ha ha bla blah5", "ha ha bla blah6", "ha ha bla blah7", "ha ha bla blah8", "ha ha bla blah9", "ha ha bla blah10");
    } finally {
        cache.close();
    }
}
Also used : StreamExpression(org.apache.solr.client.solrj.io.stream.expr.StreamExpression) UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 43 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class StreamExpressionTest method testClassifyStream.

@Test
public void testClassifyStream() throws Exception {
    Assume.assumeTrue(!useAlias);
    CollectionAdminRequest.createCollection("modelCollection", "ml", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("modelCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    CollectionAdminRequest.createCollection("uknownCollection", "ml", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("uknownCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    CollectionAdminRequest.createCollection("checkpointCollection", "ml", 2, 1).process(cluster.getSolrClient());
    AbstractDistribZkTestBase.waitForRecoveriesToFinish("checkpointCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
    UpdateRequest updateRequest = new UpdateRequest();
    for (int i = 0; i < 500; i += 2) {
        updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "1");
        updateRequest.add(id, String.valueOf(i + 1), "tv_text", "a b e e f", "out_i", "0");
    }
    updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    updateRequest = new UpdateRequest();
    updateRequest.add(id, String.valueOf(0), "text_s", "a b c c d");
    updateRequest.add(id, String.valueOf(1), "text_s", "a b e e f");
    updateRequest.commit(cluster.getSolrClient(), "uknownCollection");
    String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/" + COLLECTIONORALIAS;
    TupleStream updateTrainModelStream;
    ModifiableSolrParams paramsLoc;
    StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withCollectionZkHost("modelCollection", cluster.getZkServer().getZkAddress()).withCollectionZkHost("uknownCollection", cluster.getZkServer().getZkAddress()).withFunctionName("features", FeaturesSelectionStream.class).withFunctionName("train", TextLogitStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("update", UpdateStream.class);
    // train the model
    String textLogitExpression = "train(" + "collection1, " + "features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4)," + "q=\"*:*\", " + "name=\"model\", " + "field=\"tv_text\", " + "outcome=\"out_i\", " + "maxIterations=100)";
    updateTrainModelStream = factory.constructStream("update(modelCollection, batchSize=5, " + textLogitExpression + ")");
    getTuples(updateTrainModelStream);
    cluster.getSolrClient().commit("modelCollection");
    // classify unknown documents
    String expr = "classify(" + "model(modelCollection, id=\"model\", cacheMillis=5000)," + "topic(checkpointCollection, uknownCollection, q=\"*:*\", fl=\"text_s, id\", id=\"1000000\", initialCheckpoint=\"0\")," + "field=\"text_s\"," + "analyzerField=\"tv_text\")";
    paramsLoc = new ModifiableSolrParams();
    paramsLoc.set("expr", expr);
    paramsLoc.set("qt", "/stream");
    SolrStream classifyStream = new SolrStream(url, paramsLoc);
    Map<String, Double> idToLabel = getIdToLabel(classifyStream, "probability_d");
    assertEquals(idToLabel.size(), 2);
    assertEquals(1.0, idToLabel.get("0"), 0.001);
    assertEquals(0, idToLabel.get("1"), 0.001);
    // Add more documents and classify it
    updateRequest = new UpdateRequest();
    updateRequest.add(id, String.valueOf(2), "text_s", "a b c c d");
    updateRequest.add(id, String.valueOf(3), "text_s", "a b e e f");
    updateRequest.commit(cluster.getSolrClient(), "uknownCollection");
    classifyStream = new SolrStream(url, paramsLoc);
    idToLabel = getIdToLabel(classifyStream, "probability_d");
    assertEquals(idToLabel.size(), 2);
    assertEquals(1.0, idToLabel.get("2"), 0.001);
    assertEquals(0, idToLabel.get("3"), 0.001);
    // Train another model
    updateRequest = new UpdateRequest();
    updateRequest.deleteByQuery("*:*");
    updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    updateRequest = new UpdateRequest();
    for (int i = 0; i < 500; i += 2) {
        updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "0");
        updateRequest.add(id, String.valueOf(i + 1), "tv_text", "a b e e f", "out_i", "1");
    }
    updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    updateTrainModelStream = factory.constructStream("update(modelCollection, batchSize=5, " + textLogitExpression + ")");
    getTuples(updateTrainModelStream);
    cluster.getSolrClient().commit("modelCollection");
    // Add more documents and classify it
    updateRequest = new UpdateRequest();
    updateRequest.add(id, String.valueOf(4), "text_s", "a b c c d");
    updateRequest.add(id, String.valueOf(5), "text_s", "a b e e f");
    updateRequest.commit(cluster.getSolrClient(), "uknownCollection");
    //Sleep for 5 seconds to let model cache expire
    Thread.sleep(5100);
    classifyStream = new SolrStream(url, paramsLoc);
    idToLabel = getIdToLabel(classifyStream, "probability_d");
    assertEquals(idToLabel.size(), 2);
    assertEquals(0, idToLabel.get("4"), 0.001);
    assertEquals(1.0, idToLabel.get("5"), 0.001);
    //Classify in parallel
    // classify unknown documents
    expr = "parallel(collection1, workers=2, sort=\"_version_ asc\", classify(" + "model(modelCollection, id=\"model\")," + "topic(checkpointCollection, uknownCollection, q=\"id:(4 5)\", fl=\"text_s, id, _version_\", id=\"2000000\", partitionKeys=\"id\", initialCheckpoint=\"0\")," + "field=\"text_s\"," + "analyzerField=\"tv_text\"))";
    paramsLoc.set("expr", expr);
    classifyStream = new SolrStream(url, paramsLoc);
    idToLabel = getIdToLabel(classifyStream, "probability_d");
    assertEquals(idToLabel.size(), 2);
    assertEquals(0, idToLabel.get("4"), 0.001);
    assertEquals(1.0, idToLabel.get("5"), 0.001);
    CollectionAdminRequest.deleteCollection("modelCollection").process(cluster.getSolrClient());
    CollectionAdminRequest.deleteCollection("uknownCollection").process(cluster.getSolrClient());
    CollectionAdminRequest.deleteCollection("checkpointCollection").process(cluster.getSolrClient());
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) Test(org.junit.Test)

Example 44 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class JDBCStreamTest method testJDBCSolrInnerJoinRollupExpression.

@Test
public void testJDBCSolrInnerJoinRollupExpression() throws Exception {
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("hashJoin", HashJoinStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("jdbc", JDBCStream.class).withFunctionName("max", MaxMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class);
    // Load Database Data
    try (Connection connection = DriverManager.getConnection("jdbc:hsqldb:mem:.");
        Statement statement = connection.createStatement()) {
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('US', 'United States')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NL', 'Netherlands')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NP', 'Nepal')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NO', 'Norway')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (11,'Emma','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (12,'Grace','US')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (13,'Hailey','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (14,'Isabella','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (15,'Lily','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (16,'Madison','US')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (17,'Mia','US')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (18,'Natalie','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (19,'Olivia','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (20,'Samantha','US')");
    }
    // Load solr data
    new UpdateRequest().add(id, "1", "rating_f", "3.5", "personId_i", "11").add(id, "3", "rating_f", "2.2", "personId_i", "13").add(id, "4", "rating_f", "4.3", "personId_i", "14").add(id, "5", "rating_f", "3.5", "personId_i", "15").add(id, "8", "rating_f", "4", "personId_i", "18").add(id, "9", "rating_f", "4.1", "personId_i", "19").add(id, "2", "rating_f", "5", "personId_i", "12").add(id, "6", "rating_f", "3", "personId_i", "16").add(id, "7", "rating_f", "3", "personId_i", "17").add(id, "10", "rating_f", "4.8", "personId_i", "20").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    String expression;
    TupleStream stream;
    List<Tuple> tuples;
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    try {
        // Basic test
        expression = "rollup(" + "  hashJoin(" + "    hashed=select(" + "      search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + "      personId_i as personId," + "      rating_f as rating" + "    )," + "    select(" + "      jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by COUNTRIES.COUNTRY_NAME\", sort=\"COUNTRIES.COUNTRY_NAME asc\")," + "      ID as personId," + "      NAME as personName," + "      COUNTRY_NAME as country" + "    )," + "    on=\"personId\"" + "  )," + "  over=\"country\"," + "  max(rating)," + "  min(rating)," + "  avg(rating)," + "  count(*)" + ")";
        stream = factory.constructStream(expression);
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assertEquals(2, tuples.size());
        Tuple tuple = tuples.get(0);
        assertEquals("Netherlands", tuple.getString("country"));
        assertTrue(4.3D == tuple.getDouble("max(rating)"));
        assertTrue(2.2D == tuple.getDouble("min(rating)"));
        assertTrue(3.6D == tuple.getDouble("avg(rating)"));
        assertTrue(6D == tuple.getDouble("count(*)"));
        tuple = tuples.get(1);
        assertEquals("United States", tuple.getString("country"));
        assertTrue(5D == tuple.getDouble("max(rating)"));
        assertTrue(3D == tuple.getDouble("min(rating)"));
        assertTrue(3.95D == tuple.getDouble("avg(rating)"));
        assertTrue(4D == tuple.getDouble("count(*)"));
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) Statement(java.sql.Statement) Connection(java.sql.Connection) MaxMetric(org.apache.solr.client.solrj.io.stream.metrics.MaxMetric) MeanMetric(org.apache.solr.client.solrj.io.stream.metrics.MeanMetric) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Example 45 with StreamFactory

use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.

the class JDBCStreamTest method testJDBCSolrInnerJoinExpressionWithProperties.

@Test
public void testJDBCSolrInnerJoinExpressionWithProperties() throws Exception {
    StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("innerJoin", InnerJoinStream.class).withFunctionName("jdbc", JDBCStream.class);
    // Load Database Data
    try (Connection connection = DriverManager.getConnection("jdbc:hsqldb:mem:.");
        Statement statement = connection.createStatement()) {
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('US', 'United States')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NL', 'Netherlands')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NP', 'Nepal')");
        statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NO', 'Norway')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (11,'Emma','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (12,'Grace','US')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (13,'Hailey','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (14,'Isabella','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (15,'Lily','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (16,'Madison','US')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (17,'Mia','US')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (18,'Natalie','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (19,'Olivia','NL')");
        statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (20,'Samantha','US')");
    }
    // Load solr data
    new UpdateRequest().add(id, "1", "rating_f", "3.5", "personId_i", "11").add(id, "2", "rating_f", "5", "personId_i", "12").add(id, "3", "rating_f", "2.2", "personId_i", "13").add(id, "4", "rating_f", "4.3", "personId_i", "14").add(id, "5", "rating_f", "3.5", "personId_i", "15").add(id, "6", "rating_f", "3", "personId_i", "16").add(id, "7", "rating_f", "3", "personId_i", "17").add(id, "8", "rating_f", "4", "personId_i", "18").add(id, "9", "rating_f", "4.1", "personId_i", "19").add(id, "10", "rating_f", "4.8", "personId_i", "20").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
    String expression;
    TupleStream stream;
    List<Tuple> tuples;
    StreamContext streamContext = new StreamContext();
    SolrClientCache solrClientCache = new SolrClientCache();
    streamContext.setSolrClientCache(solrClientCache);
    try {
        // Basic test for no alias
        expression = "innerJoin(" + "  select(" + "    search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + "    personId_i as personId," + "    rating_f as rating" + "  )," + "  select(" + "    jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by PEOPLE.ID\", sort=\"ID asc\")," + "    ID as personId," + "    NAME as personName," + "    COUNTRY_NAME as country" + "  )," + "  on=\"personId\"" + ")";
        stream = factory.constructStream(expression);
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assertEquals(10, tuples.size());
        assertOrderOf(tuples, "personId", 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
        assertOrderOf(tuples, "rating", 3.5d, 5d, 2.2d, 4.3d, 3.5d, 3d, 3d, 4d, 4.1d, 4.8d);
        assertOrderOf(tuples, "personName", "Emma", "Grace", "Hailey", "Isabella", "Lily", "Madison", "Mia", "Natalie", "Olivia", "Samantha");
        assertOrderOf(tuples, "country", "Netherlands", "United States", "Netherlands", "Netherlands", "Netherlands", "United States", "United States", "Netherlands", "Netherlands", "United States");
        // Basic test for alias
        expression = "innerJoin(" + "  select(" + "    search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + "    personId_i as personId," + "    rating_f as rating" + "  )," + "  select(" + "    jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID as PERSONID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by PEOPLE.ID\", sort=\"PERSONID asc\")," + "    PERSONID as personId," + "    NAME as personName," + "    COUNTRY_NAME as country" + "  )," + "  on=\"personId\"" + ")";
        stream = factory.constructStream(expression);
        stream.setStreamContext(streamContext);
        tuples = getTuples(stream);
        assertEquals(10, tuples.size());
        assertOrderOf(tuples, "personId", 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
        assertOrderOf(tuples, "rating", 3.5d, 5d, 2.2d, 4.3d, 3.5d, 3d, 3d, 4d, 4.1d, 4.8d);
        assertOrderOf(tuples, "personName", "Emma", "Grace", "Hailey", "Isabella", "Lily", "Madison", "Mia", "Natalie", "Olivia", "Samantha");
        assertOrderOf(tuples, "country", "Netherlands", "United States", "Netherlands", "Netherlands", "Netherlands", "United States", "United States", "Netherlands", "Netherlands", "United States");
    } finally {
        solrClientCache.close();
    }
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) Statement(java.sql.Statement) StreamFactory(org.apache.solr.client.solrj.io.stream.expr.StreamFactory) Connection(java.sql.Connection) SolrClientCache(org.apache.solr.client.solrj.io.SolrClientCache) Tuple(org.apache.solr.client.solrj.io.Tuple) Test(org.junit.Test)

Aggregations

StreamFactory (org.apache.solr.client.solrj.io.stream.expr.StreamFactory)69 Tuple (org.apache.solr.client.solrj.io.Tuple)65 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)65 Test (org.junit.Test)64 SolrClientCache (org.apache.solr.client.solrj.io.SolrClientCache)61 StreamExpression (org.apache.solr.client.solrj.io.stream.expr.StreamExpression)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)14 MeanMetric (org.apache.solr.client.solrj.io.stream.metrics.MeanMetric)10 FieldComparator (org.apache.solr.client.solrj.io.comp.FieldComparator)9 MinMetric (org.apache.solr.client.solrj.io.stream.metrics.MinMetric)9 IOException (java.io.IOException)8 HashMap (java.util.HashMap)7 StreamContext (org.apache.solr.client.solrj.io.stream.StreamContext)6 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 Connection (java.sql.Connection)4 Statement (java.sql.Statement)4 List (java.util.List)4 JettySolrRunner (org.apache.solr.client.solrj.embedded.JettySolrRunner)4 AndEvaluator (org.apache.solr.client.solrj.io.eval.AndEvaluator)3