use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class StreamExpressionTest method testDaemonStream.
@Test
public void testDaemonStream() throws Exception {
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class).withFunctionName("daemon", DaemonStream.class);
StreamExpression expression;
DaemonStream daemonStream;
expression = StreamExpressionParser.parse("daemon(rollup(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"a_i,a_s\", sort=\"a_s asc\")," + "over=\"a_s\"," + "sum(a_i)" + "), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
daemonStream = (DaemonStream) factory.constructStream(expression);
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
daemonStream.setStreamContext(streamContext);
try {
//Test Long and Double Sums
// This will start the daemon thread
daemonStream.open();
for (int i = 0; i < 4; i++) {
// Reads from the queue
Tuple tuple = daemonStream.read();
String bucket = tuple.getString("a_s");
Double sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 1:"+bucket);
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 2:"+bucket);
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 3:"+bucket);
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
}
while (daemonStream.remainingCapacity() > 0) {
try {
Thread.sleep(1000);
} catch (Exception e) {
}
}
//OK capacity is full, let's index a new doc
new UpdateRequest().add(id, "10", "a_s", "hello0", "a_i", "1", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
//have the tuples with the updated count.
for (int i = 0; i < 12; i++) {
daemonStream.read();
}
//And rerun the loop. It should have a new count for hello0
for (int i = 0; i < 4; i++) {
// Reads from the queue
Tuple tuple = daemonStream.read();
String bucket = tuple.getString("a_s");
Double sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 1:"+bucket);
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 18.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 2:"+bucket);
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 3:"+bucket);
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
}
} finally {
//This should stop the daemon thread
daemonStream.close();
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class StreamExpressionTest method testParallelTopicStream.
@Test
public void testParallelTopicStream() throws Exception {
Assume.assumeTrue(!useAlias);
new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1", "subject", "ha ha bla blah0").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2", "subject", "ha ha bla blah2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3", "subject", "ha ha bla blah3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4", "subject", "ha ha bla blah4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5", "subject", "ha ha bla blah5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6", "subject", "ha ha bla blah6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7", "subject", "ha ha bla blah7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8", "subject", "ha ha bla blah8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9", "subject", "ha ha bla blah9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10", "subject", "ha ha bla blah10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("daemon", DaemonStream.class);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
SolrClientCache cache = new SolrClientCache();
try {
//Store checkpoints in the same index as the main documents. This is perfectly valid
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
StreamContext context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
tuples = getTuples(stream);
//Should be zero because the checkpoints will be set to the highest version on the shards.
assertEquals(tuples.size(), 0);
cluster.getSolrClient().commit("collection1");
//Now check to see if the checkpoints are present
expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000*\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
tuples = getTuples(stream);
assertEquals(tuples.size(), 2);
List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
assertEquals(checkpoints.size(), 2);
String id1 = tuples.get(0).getString("id");
String id2 = tuples.get(1).getString("id");
assertTrue(id1.equals("1000000_0"));
assertTrue(id2.equals("1000000_1"));
//Index a few more documents
new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicRun(stream, "10", "11");
//Test will initial checkpoint. This should pull all
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"2000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicRun(stream, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11");
//Add more documents
//Index a few more documents
new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
//Run the same topic again including the initialCheckpoint. It should start where it left off.
//initialCheckpoint should be ignored for all but the first run.
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicRun(stream, "12", "13");
//Test text extraction
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"subject:bla\", " + "fl=\"subject\", " + "id=\"3000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicSubject(stream, "ha ha bla blah0", "ha ha bla blah1", "ha ha bla blah2", "ha ha bla blah3", "ha ha bla blah4", "ha ha bla blah5", "ha ha bla blah6", "ha ha bla blah7", "ha ha bla blah8", "ha ha bla blah9", "ha ha bla blah10");
} finally {
cache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class StreamExpressionTest method testClassifyStream.
@Test
public void testClassifyStream() throws Exception {
Assume.assumeTrue(!useAlias);
CollectionAdminRequest.createCollection("modelCollection", "ml", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("modelCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
CollectionAdminRequest.createCollection("uknownCollection", "ml", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("uknownCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
CollectionAdminRequest.createCollection("checkpointCollection", "ml", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("checkpointCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
UpdateRequest updateRequest = new UpdateRequest();
for (int i = 0; i < 500; i += 2) {
updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "1");
updateRequest.add(id, String.valueOf(i + 1), "tv_text", "a b e e f", "out_i", "0");
}
updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
updateRequest = new UpdateRequest();
updateRequest.add(id, String.valueOf(0), "text_s", "a b c c d");
updateRequest.add(id, String.valueOf(1), "text_s", "a b e e f");
updateRequest.commit(cluster.getSolrClient(), "uknownCollection");
String url = cluster.getJettySolrRunners().get(0).getBaseUrl().toString() + "/" + COLLECTIONORALIAS;
TupleStream updateTrainModelStream;
ModifiableSolrParams paramsLoc;
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withCollectionZkHost("modelCollection", cluster.getZkServer().getZkAddress()).withCollectionZkHost("uknownCollection", cluster.getZkServer().getZkAddress()).withFunctionName("features", FeaturesSelectionStream.class).withFunctionName("train", TextLogitStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("update", UpdateStream.class);
// train the model
String textLogitExpression = "train(" + "collection1, " + "features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4)," + "q=\"*:*\", " + "name=\"model\", " + "field=\"tv_text\", " + "outcome=\"out_i\", " + "maxIterations=100)";
updateTrainModelStream = factory.constructStream("update(modelCollection, batchSize=5, " + textLogitExpression + ")");
getTuples(updateTrainModelStream);
cluster.getSolrClient().commit("modelCollection");
// classify unknown documents
String expr = "classify(" + "model(modelCollection, id=\"model\", cacheMillis=5000)," + "topic(checkpointCollection, uknownCollection, q=\"*:*\", fl=\"text_s, id\", id=\"1000000\", initialCheckpoint=\"0\")," + "field=\"text_s\"," + "analyzerField=\"tv_text\")";
paramsLoc = new ModifiableSolrParams();
paramsLoc.set("expr", expr);
paramsLoc.set("qt", "/stream");
SolrStream classifyStream = new SolrStream(url, paramsLoc);
Map<String, Double> idToLabel = getIdToLabel(classifyStream, "probability_d");
assertEquals(idToLabel.size(), 2);
assertEquals(1.0, idToLabel.get("0"), 0.001);
assertEquals(0, idToLabel.get("1"), 0.001);
// Add more documents and classify it
updateRequest = new UpdateRequest();
updateRequest.add(id, String.valueOf(2), "text_s", "a b c c d");
updateRequest.add(id, String.valueOf(3), "text_s", "a b e e f");
updateRequest.commit(cluster.getSolrClient(), "uknownCollection");
classifyStream = new SolrStream(url, paramsLoc);
idToLabel = getIdToLabel(classifyStream, "probability_d");
assertEquals(idToLabel.size(), 2);
assertEquals(1.0, idToLabel.get("2"), 0.001);
assertEquals(0, idToLabel.get("3"), 0.001);
// Train another model
updateRequest = new UpdateRequest();
updateRequest.deleteByQuery("*:*");
updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
updateRequest = new UpdateRequest();
for (int i = 0; i < 500; i += 2) {
updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "0");
updateRequest.add(id, String.valueOf(i + 1), "tv_text", "a b e e f", "out_i", "1");
}
updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
updateTrainModelStream = factory.constructStream("update(modelCollection, batchSize=5, " + textLogitExpression + ")");
getTuples(updateTrainModelStream);
cluster.getSolrClient().commit("modelCollection");
// Add more documents and classify it
updateRequest = new UpdateRequest();
updateRequest.add(id, String.valueOf(4), "text_s", "a b c c d");
updateRequest.add(id, String.valueOf(5), "text_s", "a b e e f");
updateRequest.commit(cluster.getSolrClient(), "uknownCollection");
//Sleep for 5 seconds to let model cache expire
Thread.sleep(5100);
classifyStream = new SolrStream(url, paramsLoc);
idToLabel = getIdToLabel(classifyStream, "probability_d");
assertEquals(idToLabel.size(), 2);
assertEquals(0, idToLabel.get("4"), 0.001);
assertEquals(1.0, idToLabel.get("5"), 0.001);
//Classify in parallel
// classify unknown documents
expr = "parallel(collection1, workers=2, sort=\"_version_ asc\", classify(" + "model(modelCollection, id=\"model\")," + "topic(checkpointCollection, uknownCollection, q=\"id:(4 5)\", fl=\"text_s, id, _version_\", id=\"2000000\", partitionKeys=\"id\", initialCheckpoint=\"0\")," + "field=\"text_s\"," + "analyzerField=\"tv_text\"))";
paramsLoc.set("expr", expr);
classifyStream = new SolrStream(url, paramsLoc);
idToLabel = getIdToLabel(classifyStream, "probability_d");
assertEquals(idToLabel.size(), 2);
assertEquals(0, idToLabel.get("4"), 0.001);
assertEquals(1.0, idToLabel.get("5"), 0.001);
CollectionAdminRequest.deleteCollection("modelCollection").process(cluster.getSolrClient());
CollectionAdminRequest.deleteCollection("uknownCollection").process(cluster.getSolrClient());
CollectionAdminRequest.deleteCollection("checkpointCollection").process(cluster.getSolrClient());
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class JDBCStreamTest method testJDBCSolrInnerJoinRollupExpression.
@Test
public void testJDBCSolrInnerJoinRollupExpression() throws Exception {
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("hashJoin", HashJoinStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("jdbc", JDBCStream.class).withFunctionName("max", MaxMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class);
// Load Database Data
try (Connection connection = DriverManager.getConnection("jdbc:hsqldb:mem:.");
Statement statement = connection.createStatement()) {
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('US', 'United States')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NL', 'Netherlands')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NP', 'Nepal')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NO', 'Norway')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (11,'Emma','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (12,'Grace','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (13,'Hailey','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (14,'Isabella','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (15,'Lily','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (16,'Madison','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (17,'Mia','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (18,'Natalie','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (19,'Olivia','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (20,'Samantha','US')");
}
// Load solr data
new UpdateRequest().add(id, "1", "rating_f", "3.5", "personId_i", "11").add(id, "3", "rating_f", "2.2", "personId_i", "13").add(id, "4", "rating_f", "4.3", "personId_i", "14").add(id, "5", "rating_f", "3.5", "personId_i", "15").add(id, "8", "rating_f", "4", "personId_i", "18").add(id, "9", "rating_f", "4.1", "personId_i", "19").add(id, "2", "rating_f", "5", "personId_i", "12").add(id, "6", "rating_f", "3", "personId_i", "16").add(id, "7", "rating_f", "3", "personId_i", "17").add(id, "10", "rating_f", "4.8", "personId_i", "20").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
String expression;
TupleStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
try {
// Basic test
expression = "rollup(" + " hashJoin(" + " hashed=select(" + " search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + " personId_i as personId," + " rating_f as rating" + " )," + " select(" + " jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by COUNTRIES.COUNTRY_NAME\", sort=\"COUNTRIES.COUNTRY_NAME asc\")," + " ID as personId," + " NAME as personName," + " COUNTRY_NAME as country" + " )," + " on=\"personId\"" + " )," + " over=\"country\"," + " max(rating)," + " min(rating)," + " avg(rating)," + " count(*)" + ")";
stream = factory.constructStream(expression);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(2, tuples.size());
Tuple tuple = tuples.get(0);
assertEquals("Netherlands", tuple.getString("country"));
assertTrue(4.3D == tuple.getDouble("max(rating)"));
assertTrue(2.2D == tuple.getDouble("min(rating)"));
assertTrue(3.6D == tuple.getDouble("avg(rating)"));
assertTrue(6D == tuple.getDouble("count(*)"));
tuple = tuples.get(1);
assertEquals("United States", tuple.getString("country"));
assertTrue(5D == tuple.getDouble("max(rating)"));
assertTrue(3D == tuple.getDouble("min(rating)"));
assertTrue(3.95D == tuple.getDouble("avg(rating)"));
assertTrue(4D == tuple.getDouble("count(*)"));
} finally {
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamFactory in project lucene-solr by apache.
the class JDBCStreamTest method testJDBCSolrInnerJoinExpressionWithProperties.
@Test
public void testJDBCSolrInnerJoinExpressionWithProperties() throws Exception {
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("select", SelectStream.class).withFunctionName("innerJoin", InnerJoinStream.class).withFunctionName("jdbc", JDBCStream.class);
// Load Database Data
try (Connection connection = DriverManager.getConnection("jdbc:hsqldb:mem:.");
Statement statement = connection.createStatement()) {
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('US', 'United States')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NL', 'Netherlands')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NP', 'Nepal')");
statement.executeUpdate("insert into COUNTRIES (CODE,COUNTRY_NAME) values ('NO', 'Norway')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (11,'Emma','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (12,'Grace','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (13,'Hailey','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (14,'Isabella','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (15,'Lily','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (16,'Madison','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (17,'Mia','US')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (18,'Natalie','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (19,'Olivia','NL')");
statement.executeUpdate("insert into PEOPLE (ID, NAME, COUNTRY_CODE) values (20,'Samantha','US')");
}
// Load solr data
new UpdateRequest().add(id, "1", "rating_f", "3.5", "personId_i", "11").add(id, "2", "rating_f", "5", "personId_i", "12").add(id, "3", "rating_f", "2.2", "personId_i", "13").add(id, "4", "rating_f", "4.3", "personId_i", "14").add(id, "5", "rating_f", "3.5", "personId_i", "15").add(id, "6", "rating_f", "3", "personId_i", "16").add(id, "7", "rating_f", "3", "personId_i", "17").add(id, "8", "rating_f", "4", "personId_i", "18").add(id, "9", "rating_f", "4.1", "personId_i", "19").add(id, "10", "rating_f", "4.8", "personId_i", "20").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
String expression;
TupleStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
try {
// Basic test for no alias
expression = "innerJoin(" + " select(" + " search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + " personId_i as personId," + " rating_f as rating" + " )," + " select(" + " jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by PEOPLE.ID\", sort=\"ID asc\")," + " ID as personId," + " NAME as personName," + " COUNTRY_NAME as country" + " )," + " on=\"personId\"" + ")";
stream = factory.constructStream(expression);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(10, tuples.size());
assertOrderOf(tuples, "personId", 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
assertOrderOf(tuples, "rating", 3.5d, 5d, 2.2d, 4.3d, 3.5d, 3d, 3d, 4d, 4.1d, 4.8d);
assertOrderOf(tuples, "personName", "Emma", "Grace", "Hailey", "Isabella", "Lily", "Madison", "Mia", "Natalie", "Olivia", "Samantha");
assertOrderOf(tuples, "country", "Netherlands", "United States", "Netherlands", "Netherlands", "Netherlands", "United States", "United States", "Netherlands", "Netherlands", "United States");
// Basic test for alias
expression = "innerJoin(" + " select(" + " search(" + COLLECTIONORALIAS + ", fl=\"personId_i,rating_f\", q=\"rating_f:*\", sort=\"personId_i asc\")," + " personId_i as personId," + " rating_f as rating" + " )," + " select(" + " jdbc(connection=\"jdbc:hsqldb:mem:.\", sql=\"select PEOPLE.ID as PERSONID, PEOPLE.NAME, COUNTRIES.COUNTRY_NAME from PEOPLE inner join COUNTRIES on PEOPLE.COUNTRY_CODE = COUNTRIES.CODE order by PEOPLE.ID\", sort=\"PERSONID asc\")," + " PERSONID as personId," + " NAME as personName," + " COUNTRY_NAME as country" + " )," + " on=\"personId\"" + ")";
stream = factory.constructStream(expression);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(10, tuples.size());
assertOrderOf(tuples, "personId", 11, 12, 13, 14, 15, 16, 17, 18, 19, 20);
assertOrderOf(tuples, "rating", 3.5d, 5d, 2.2d, 4.3d, 3.5d, 3d, 3d, 4d, 4.1d, 4.8d);
assertOrderOf(tuples, "personName", "Emma", "Grace", "Hailey", "Isabella", "Lily", "Madison", "Mia", "Natalie", "Olivia", "Samantha");
assertOrderOf(tuples, "country", "Netherlands", "United States", "Netherlands", "Netherlands", "Netherlands", "United States", "United States", "Netherlands", "Netherlands", "United States");
} finally {
solrClientCache.close();
}
}
Aggregations