use of org.apache.solr.client.solrj.io.stream.expr.StreamExpression in project lucene-solr by apache.
the class MergeStream method toExpression.
private StreamExpression toExpression(StreamFactory factory, boolean includeStreams) throws IOException {
// function name
StreamExpression expression = new StreamExpression(factory.getFunctionName(this.getClass()));
// streams
for (PushBackStream stream : streams) {
if (includeStreams) {
expression.addParameter(stream.toExpression(factory));
} else {
expression.addParameter("<stream>");
}
}
// on
expression.addParameter(new StreamExpressionNamedParameter("on", comp.toExpression(factory)));
return expression;
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamExpression in project lucene-solr by apache.
the class StreamExpressionTest method testTopicStream.
@Test
public void testTopicStream() throws Exception {
Assume.assumeTrue(!useAlias);
new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("daemon", DaemonStream.class);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
SolrClientCache cache = new SolrClientCache();
try {
//Store checkpoints in the same index as the main documents. This perfectly valid
expression = StreamExpressionParser.parse("topic(collection1, collection1, q=\"a_s:hello\", fl=\"id\", id=\"1000000\", checkpointEvery=3)");
stream = factory.constructStream(expression);
StreamContext context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
tuples = getTuples(stream);
//Should be zero because the checkpoints will be set to the highest vesion on the shards.
assertEquals(tuples.size(), 0);
cluster.getSolrClient().commit("collection1");
//Now check to see if the checkpoints are present
expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
tuples = getTuples(stream);
assertEquals(tuples.size(), 1);
List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
assertEquals(checkpoints.size(), 2);
Long version1 = tuples.get(0).getLong("_version_");
//Index a few more documents
new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
expression = StreamExpressionParser.parse("topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2)");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
try {
stream.open();
Tuple tuple1 = stream.read();
assertEquals((long) tuple1.getLong("id"), 10l);
cluster.getSolrClient().commit("collection1");
// Checkpoint should not have changed.
expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
TupleStream cstream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
cstream.setStreamContext(context);
tuples = getTuples(cstream);
assertEquals(tuples.size(), 1);
checkpoints = tuples.get(0).getStrings("checkpoint_ss");
assertEquals(checkpoints.size(), 2);
Long version2 = tuples.get(0).getLong("_version_");
assertEquals(version1, version2);
Tuple tuple2 = stream.read();
cluster.getSolrClient().commit("collection1");
assertEquals((long) tuple2.getLong("id"), 11l);
//Checkpoint should have changed.
expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
cstream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
cstream.setStreamContext(context);
tuples = getTuples(cstream);
assertEquals(tuples.size(), 1);
checkpoints = tuples.get(0).getStrings("checkpoint_ss");
assertEquals(checkpoints.size(), 2);
Long version3 = tuples.get(0).getLong("_version_");
assertTrue(version3 > version2);
Tuple tuple3 = stream.read();
assertTrue(tuple3.EOF);
} finally {
stream.close();
}
//Test with the DaemonStream
DaemonStream dstream = null;
try {
expression = StreamExpressionParser.parse("daemon(topic(collection1, collection1, fl=\"id\", q=\"a_s:hello\", id=\"1000000\", checkpointEvery=2), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
dstream = (DaemonStream) factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
dstream.setStreamContext(context);
//Index a few more documents
new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
//Start reading from the DaemonStream
Tuple tuple = null;
dstream.open();
tuple = dstream.read();
assertEquals(12, (long) tuple.getLong(id));
tuple = dstream.read();
assertEquals(13, (long) tuple.getLong(id));
// We want to see if the version has been updated after reading two tuples
cluster.getSolrClient().commit("collection1");
//Index a few more documents
new UpdateRequest().add(id, "14", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "15", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
//Read from the same DaemonStream stream
tuple = dstream.read();
assertEquals(14, (long) tuple.getLong(id));
// This should trigger a checkpoint as it's the 4th read from the stream.
tuple = dstream.read();
assertEquals(15, (long) tuple.getLong(id));
dstream.shutdown();
tuple = dstream.read();
assertTrue(tuple.EOF);
} finally {
dstream.close();
}
} finally {
cache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamExpression in project lucene-solr by apache.
the class StreamExpressionTest method testDaemonStream.
@Test
public void testDaemonStream() throws Exception {
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "1").add(id, "2", "a_s", "hello0", "a_i", "2", "a_f", "2").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello0", "a_i", "1", "a_f", "5").add(id, "5", "a_s", "hello3", "a_i", "10", "a_f", "6").add(id, "6", "a_s", "hello4", "a_i", "11", "a_f", "7").add(id, "7", "a_s", "hello3", "a_i", "12", "a_f", "8").add(id, "8", "a_s", "hello3", "a_i", "13", "a_f", "9").add(id, "9", "a_s", "hello0", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("rollup", RollupStream.class).withFunctionName("sum", SumMetric.class).withFunctionName("min", MinMetric.class).withFunctionName("max", MaxMetric.class).withFunctionName("avg", MeanMetric.class).withFunctionName("count", CountMetric.class).withFunctionName("daemon", DaemonStream.class);
StreamExpression expression;
DaemonStream daemonStream;
expression = StreamExpressionParser.parse("daemon(rollup(" + "search(" + COLLECTIONORALIAS + ", q=\"*:*\", fl=\"a_i,a_s\", sort=\"a_s asc\")," + "over=\"a_s\"," + "sum(a_i)" + "), id=\"test\", runInterval=\"1000\", queueSize=\"9\")");
daemonStream = (DaemonStream) factory.constructStream(expression);
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
daemonStream.setStreamContext(streamContext);
try {
//Test Long and Double Sums
// This will start the daemon thread
daemonStream.open();
for (int i = 0; i < 4; i++) {
// Reads from the queue
Tuple tuple = daemonStream.read();
String bucket = tuple.getString("a_s");
Double sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 1:"+bucket);
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 17.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 2:"+bucket);
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 3:"+bucket);
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
}
while (daemonStream.remainingCapacity() > 0) {
try {
Thread.sleep(1000);
} catch (Exception e) {
}
}
//OK capacity is full, let's index a new doc
new UpdateRequest().add(id, "10", "a_s", "hello0", "a_i", "1", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
//have the tuples with the updated count.
for (int i = 0; i < 12; i++) {
daemonStream.read();
}
//And rerun the loop. It should have a new count for hello0
for (int i = 0; i < 4; i++) {
// Reads from the queue
Tuple tuple = daemonStream.read();
String bucket = tuple.getString("a_s");
Double sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 1:"+bucket);
assertTrue(bucket.equals("hello0"));
assertTrue(sumi.doubleValue() == 18.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 2:"+bucket);
assertTrue(bucket.equals("hello3"));
assertTrue(sumi.doubleValue() == 38.0D);
tuple = daemonStream.read();
bucket = tuple.getString("a_s");
sumi = tuple.getDouble("sum(a_i)");
//System.out.println("#################################### Bucket 3:"+bucket);
assertTrue(bucket.equals("hello4"));
assertTrue(sumi.longValue() == 15);
}
} finally {
//This should stop the daemon thread
daemonStream.close();
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamExpression in project lucene-solr by apache.
the class StreamExpressionTest method testParallelTopicStream.
@Test
public void testParallelTopicStream() throws Exception {
Assume.assumeTrue(!useAlias);
new UpdateRequest().add(id, "0", "a_s", "hello", "a_i", "0", "a_f", "1", "subject", "ha ha bla blah0").add(id, "2", "a_s", "hello", "a_i", "2", "a_f", "2", "subject", "ha ha bla blah2").add(id, "3", "a_s", "hello", "a_i", "3", "a_f", "3", "subject", "ha ha bla blah3").add(id, "4", "a_s", "hello", "a_i", "4", "a_f", "4", "subject", "ha ha bla blah4").add(id, "1", "a_s", "hello", "a_i", "1", "a_f", "5", "subject", "ha ha bla blah5").add(id, "5", "a_s", "hello", "a_i", "10", "a_f", "6", "subject", "ha ha bla blah6").add(id, "6", "a_s", "hello", "a_i", "11", "a_f", "7", "subject", "ha ha bla blah7").add(id, "7", "a_s", "hello", "a_i", "12", "a_f", "8", "subject", "ha ha bla blah8").add(id, "8", "a_s", "hello", "a_i", "13", "a_f", "9", "subject", "ha ha bla blah9").add(id, "9", "a_s", "hello", "a_i", "14", "a_f", "10", "subject", "ha ha bla blah10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withFunctionName("topic", TopicStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("parallel", ParallelStream.class).withFunctionName("daemon", DaemonStream.class);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
SolrClientCache cache = new SolrClientCache();
try {
//Store checkpoints in the same index as the main documents. This is perfectly valid
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
StreamContext context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
tuples = getTuples(stream);
//Should be zero because the checkpoints will be set to the highest version on the shards.
assertEquals(tuples.size(), 0);
cluster.getSolrClient().commit("collection1");
//Now check to see if the checkpoints are present
expression = StreamExpressionParser.parse("search(collection1, q=\"id:1000000*\", fl=\"id, checkpoint_ss, _version_\", sort=\"id asc\")");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
tuples = getTuples(stream);
assertEquals(tuples.size(), 2);
List<String> checkpoints = tuples.get(0).getStrings("checkpoint_ss");
assertEquals(checkpoints.size(), 2);
String id1 = tuples.get(0).getString("id");
String id2 = tuples.get(1).getString("id");
assertTrue(id1.equals("1000000_0"));
assertTrue(id2.equals("1000000_1"));
//Index a few more documents
new UpdateRequest().add(id, "10", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "11", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"1000000\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicRun(stream, "10", "11");
//Test will initial checkpoint. This should pull all
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"a_s:hello\", " + "fl=\"id\", " + "id=\"2000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicRun(stream, "0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11");
//Add more documents
//Index a few more documents
new UpdateRequest().add(id, "12", "a_s", "hello", "a_i", "13", "a_f", "9").add(id, "13", "a_s", "hello", "a_i", "14", "a_f", "10").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
//Run the same topic again including the initialCheckpoint. It should start where it left off.
//initialCheckpoint should be ignored for all but the first run.
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicRun(stream, "12", "13");
//Test text extraction
expression = StreamExpressionParser.parse("parallel(collection1, " + "workers=\"2\", " + "sort=\"_version_ asc\"," + "topic(collection1, " + "collection1, " + "q=\"subject:bla\", " + "fl=\"subject\", " + "id=\"3000000\", " + "initialCheckpoint=\"0\", " + "partitionKeys=\"id\"))");
stream = factory.constructStream(expression);
context = new StreamContext();
context.setSolrClientCache(cache);
stream.setStreamContext(context);
assertTopicSubject(stream, "ha ha bla blah0", "ha ha bla blah1", "ha ha bla blah2", "ha ha bla blah3", "ha ha bla blah4", "ha ha bla blah5", "ha ha bla blah6", "ha ha bla blah7", "ha ha bla blah8", "ha ha bla blah9", "ha ha bla blah10");
} finally {
cache.close();
}
}
use of org.apache.solr.client.solrj.io.stream.expr.StreamExpression in project lucene-solr by apache.
the class StreamExpressionTest method testCloudSolrStream.
@Test
public void testCloudSolrStream() throws Exception {
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress());
StreamExpression expression;
CloudSolrStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
try {
// Basic test
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "a_i", 0);
// Basic w/aliases
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\", aliases=\"a_i=alias.a_i, a_s=name\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "alias.a_i", 0);
assertString(tuples.get(0), "name", "hello0");
// Basic filtered test
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"id:(0 3 4)\", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 3);
assertOrder(tuples, 0, 3, 4);
assertLong(tuples.get(1), "a_i", 3);
try {
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
throw new Exception("Should be an exception here");
} catch (Exception e) {
assertTrue(e.getMessage().contains("q param expected for search function"));
}
try {
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
throw new Exception("Should be an exception here");
} catch (Exception e) {
assertTrue(e.getMessage().contains("fl param expected for search function"));
}
try {
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=\"blah\", fl=\"id, a_f\", sort=\"a_f\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
throw new Exception("Should be an exception here");
} catch (Exception e) {
assertTrue(e.getMessage().contains("Invalid sort spec"));
}
// Test with shards param
List<String> shardUrls = TupleStream.getShards(cluster.getZkServer().getZkAddress(), COLLECTIONORALIAS, streamContext);
Map<String, List<String>> shardsMap = new HashMap();
shardsMap.put("myCollection", shardUrls);
StreamContext context = new StreamContext();
context.put("shards", shardsMap);
context.setSolrClientCache(solrClientCache);
// Basic test
expression = StreamExpressionParser.parse("search(myCollection, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(context);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "a_i", 0);
//Execersise the /stream hander
//Add the shards http parameter for the myCollection
StringBuilder buf = new StringBuilder();
for (String shardUrl : shardUrls) {
if (buf.length() > 0) {
buf.append(",");
}
buf.append(shardUrl);
}
ModifiableSolrParams solrParams = new ModifiableSolrParams();
solrParams.add("qt", "/stream");
solrParams.add("expr", "search(myCollection, q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc, a_i asc\")");
solrParams.add("myCollection.shards", buf.toString());
SolrStream solrStream = new SolrStream(shardUrls.get(0), solrParams);
stream.setStreamContext(context);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 2, 1, 3, 4);
assertLong(tuples.get(0), "a_i", 0);
} finally {
solrClientCache.close();
}
}
Aggregations