use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class JDBCStreamTest method testJDBCSolrInnerJoinExpression.
@Test
public void testJDBCSolrInnerJoinExpression() throws Exception {
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("innerJoin", InnerJoinStream.class).withFunctionName("jdbc", JDBCStream.class);
// Load Database Data
try (Connection connection = DriverManager.getConnection("jdbc:hsqldb:mem:.");
Statement statement = connection.createStatement()) {
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('US', 'United States')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NL', 'Netherlands')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NP', 'Nepal')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NO', 'Norway')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (11,'Emma','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (12,'Grace','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (13,'Hailey','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (14,'Isabella','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (15,'Lily','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (16,'Madison','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (17,'Mia','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (18,'Natalie','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (19,'Olivia','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (20,'Samantha','US')");
}
// Load solr data
new UpdateRequest().add(id, "1", "rating_f", "3.5", "personId_i", "11").add(id, "2", "rating_f", "5", "personId_i", "12").add(id, "3", "rating_f", "2.2", "personId_i", "13").add(id, "4", "rating_f", "4.3", "personId_i", "14").add(id, "5", "rating_f", "3.5", "personId_i", "15").add(id, "6", "rating_f", "3", "personId_i", "16").add(id, "7", "rating_f", "3", "personId_i", "17").add(id, "8", "rating_f", "4", "personId_i", "18").add(id, "9", "rating_f", "4.1", "personId_i", "19").add(id, "10", "rating_f", "4.8", "personId_i", "20").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
String expression;
TupleStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
try {
// Basic test
expression = "innerJoin(" + " select(" + " search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + " personId_i as personId," + " rating_f as rating" + " )," + " select(" + " jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by PEOPLE.ID\", sort=\"ID asc\")," + " ID as personId," + " NAME as personName," + " COUNTRY_NAME as country" + " )," + " on=\"personId\"" + ")";
stream = factory.constructStream(expression);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(10, tuples.size());
assertOrderOf(tuples, "personId", 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
assertOrderOf(tuples, "rating", 3.5d, 5d, 2.2d, 4.3d, 3.5d, 3d, 3d, 4d, 4.1d, 4.8d);
assertOrderOf(tuples, "personName", "Emma", "Grace", "Hailey", "Isabella", "Lily", "Madison", "Mia", "Natalie", "Olivia", "Samantha");
assertOrderOf(tuples, "country", "Netherlands", "United States", "Netherlands", "Netherlands", "Netherlands", "United States", "United States", "Netherlands", "Netherlands", "United States");
} finally {
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class StreamExpressionTest method testCloudSolrStream.
@Test
public void testCloudSolrStream() throws Exception {
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress());
StreamExpression expression;
CloudSolrStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
try {
// Basic test
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "a_i", 0);
// Basic w/aliases
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", aliases=\"a_i=alias.a_i, a_s=name\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "alias.a_i", 0);
assertString(tuples.get(0), "name", "hello0");
// Basic filtered test
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 3);
assertOrder(tuples, 0, 3, 4);
assertLong(tuples.get(1), "a_i", 3);
try {
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
throw new Exception("Should be an exception here");
} catch (Exception e) {
assertTrue(e.getMessage().contains("q param expected for search function"));
}
try {
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
throw new Exception("Should be an exception here");
} catch (Exception e) {
assertTrue(e.getMessage().contains("fl param expected for search function"));
}
try {
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", fl=\"id, a_f\", sort=\"a_f\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
throw new Exception("Should be an exception here");
} catch (Exception e) {
assertTrue(e.getMessage().contains("Invalid sort spec"));
}
// Test with shards param
List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext);
Map<String, List<String>> shardsMap = new HashMap();
shardsMap.put("myCollection", shardUrls);
StreamContext context = new StreamContext();
context.put("shards", shardsMap);
context.setSolrClientCache(solrClientCache);
// Basic test
expression = StreamExpressionParser.parse("search(myCollection, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(context);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "a_i", 0);
//Execersise the /stream hander
//Add the shards http parameter for the myCollection
StringBuilder buf = new StringBuilder();
for (String shardUrl : shardUrls) {
if (buf.length() > 0) {
buf.append(",");
}
buf.append(shardUrl);
}
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.add("qt", "/stream");
solrParams.add("expr", "search(myCollection, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
solrParams.add("myCollection.shards", buf.toString());
SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams);
stream.setStreamContext(context);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "a_i", 0);
} finally {
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class StreamExpressionTest method testParallelExecutorStream.
@Test
public void testParallelExecutorStream() throws Exception {
CollectionAdminRequest.createCollection("workQueue", "conf", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("workQueue", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
CollectionAdminRequest.createCollection("mainCorpus", "conf", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("mainCorpus", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
CollectionAdminRequest.createCollection("destination", "conf", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("destination", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
UpdateRequest workRequest = new UpdateRequest();
UpdateRequest dataRequest = new UpdateRequest();
for (int i = 0; i < 500; i++) {
workRequest.add(id, String.valueOf(i), "expr_s", "update(destination, batchSize=50, search(mainCorpus, q=id:" + i + ", rows=1, sort=\"id asc\", fl=\"id, body_t, field_i\"))");
dataRequest.add(id, String.valueOf(i), "body_t", "hello world " + i, "field_i", Integer.toString(i));
}
workRequest.commit(cluster.getSolrClient(), "workQueue");
dataRequest.commit(cluster.getSolrClient(), "mainCorpus");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/destination";
TupleStream executorStream;
ModifiableSolrParams paramsLoc;
StreamFactory factory = new StreamFactory().withCollectionZkHost("workQueue", cluster.getZkServer().getZkAddress()).withCollectionZkHost("mainCorpus", cluster.getZkServer().getZkAddress()).withCollectionZkHost("destination", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("executor", ExecutorStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("update", UpdateStream.class);
String executorExpression = "parallel(workQueue, workers=2, sort=\"EOF asc\", executor(threads=3, queueSize=100, search(workQueue, q=\"*:*\", fl=\"id, expr_s\", rows=1000, partitionKeys=id, sort=\"id desc\")))";
executorStream = factory.constructStream(executorExpression);
StreamContext context = new StreamContext();
SolrClientCache clientCache = new SolrClientCache();
context.setSolrClientCache(clientCache);
executorStream.setStreamContext(context);
getTuples(executorStream);
//Destination collection should now contain all the records in the main corpus.
cluster.getSolrClient().commit("destination");
paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", "search(destination, q=\"*:*\", fl=\"id, body_t, field_i\", rows=1000, sort=\"field_i asc\")");
paramsLoc.set("qt", "/stream");
SolrStream solrStream = new SolrStream(url, paramsLoc);
List<Tuple> tuples = getTuples(solrStream);
assertTrue(tuples.size() == 500);
for (int i = 0; i < 500; i++) {
Tuple tuple = tuples.get(i);
long ivalue = tuple.getLong("field_i");
String body = tuple.getString("body_t");
assertTrue(ivalue == i);
assertTrue(body.equals("hello world " + i));
}
solrStream.close();
clientCache.close();
CollectionAdminRequest.deleteCollection("workQueue").process(cluster.getSolrClient());
CollectionAdminRequest.deleteCollection("mainCorpus").process(cluster.getSolrClient());
CollectionAdminRequest.deleteCollection("destination").process(cluster.getSolrClient());
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class StreamExpressionTest method testParallelRollupStream.
@Test
public void testParallelRollupStream() throws Exception {
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class);
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
try {
expression = StreamExpressionParser.parse("parallel(" + COLLECTIONORALIAS + "," + "rollup(" + "search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"a_s,a_i,a_f\", sort=\"a_s asc\", partitionKeys=\"a_s\")," + "over=\"a_s\"," + "sum(a_i)," + "sum(a_f)," + "min(a_i)," + "min(a_f)," + "max(a_i)," + "max(a_f)," + "avg(a_i)," + "avg(a_f)," + "count(*)" + ")," + "workers=\"2\", zkHost=\"" + cluster.getZkServer().getZkAddress() + "\", sort=\"a_s asc\")");
stream = factory.constructStream(expression);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 3);
//Test Long and Double Sums
Tuple tuple = tuples.get(0);
String bucket = tuple.getString("a_s");
Double sumi = tuple.getDouble("sum(a_i)");
Double sumf = tuple.getDouble("sum(a_f)");
Double mini = tuple.getDouble("min(a_i)");
Double minf = tuple.getDouble("min(a_f)");
Double maxi = tuple.getDouble("max(a_i)");
Double maxf = tuple.getDouble("max(a_f)");
Double avgi = tuple.getDouble("avg(a_i)");
Double avgf = tuple.getDouble("avg(a_f)");
Double count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
assertTrue(sumf.doubleValue() == 18.0D);
assertTrue(mini.doubleValue() == 0.0D);
assertTrue(minf.doubleValue() == 1.0D);
assertTrue(maxi.doubleValue() == 14.0D);
assertTrue(maxf.doubleValue() == 10.0D);
assertTrue(avgi.doubleValue() == 4.25D);
assertTrue(avgf.doubleValue() == 4.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(1);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
assertTrue(sumf.doubleValue() == 26.0D);
assertTrue(mini.doubleValue() == 3.0D);
assertTrue(minf.doubleValue() == 3.0D);
assertTrue(maxi.doubleValue() == 13.0D);
assertTrue(maxf.doubleValue() == 9.0D);
assertTrue(avgi.doubleValue() == 9.5D);
assertTrue(avgf.doubleValue() == 6.5D);
assertTrue(count.doubleValue() == 4);
tuple = tuples.get(2);
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
sumf = tuple.getDouble("sum(a_f)");
mini = tuple.getDouble("min(a_i)");
minf = tuple.getDouble("min(a_f)");
maxi = tuple.getDouble("max(a_i)");
maxf = tuple.getDouble("max(a_f)");
avgi = tuple.getDouble("avg(a_i)");
avgf = tuple.getDouble("avg(a_f)");
count = tuple.getDouble("count(*)");
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
assertTrue(sumf.doubleValue() == 11.0D);
assertTrue(mini.doubleValue() == 4.0D);
assertTrue(minf.doubleValue() == 4.0D);
assertTrue(maxi.doubleValue() == 11.0D);
assertTrue(maxf.doubleValue() == 7.0D);
assertTrue(avgi.doubleValue() == 7.5D);
assertTrue(avgf.doubleValue() == 5.5D);
assertTrue(count.doubleValue() == 2);
} finally {
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class SelectWithEvaluatorsTest method testSelectWithEvaluatorsStream.
@Test
public void testSelectWithEvaluatorsStream() throws Exception {
new UpdateRequest().add(id, "1", "a_s", "foo", "b_i", "1", "c_d", "3.3", "d_b", "true").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
String clause;
TupleStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("add", AddEvaluator.class).withFunctionName("if", IfThenElseEvaluator.class).withFunctionName("gt", GreaterThanEvaluator.class);
try {
// Basic test
clause = "select(" + "id," + "add(b_i,c_d) as result," + "search(collection1, q=*:*, fl=\"id,a_s,b_i,c_d,d_b\", sort=\"id asc\")" + ")";
stream = factory.constructStream(clause);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertFields(tuples, "id", "result");
assertNotFields(tuples, "a_s", "b_i", "c_d", "d_b");
assertEquals(1, tuples.size());
assertDouble(tuples.get(0), "result", 4.3);
assertEquals(4.3, tuples.get(0).get("result"));
} finally {
solrClientCache.close();
}
}
Aggregations