use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.
the class StreamExpressionTest method testNulls.
@Test
public void testNulls() throws Exception {
new UpdateRequest().add(id, "0", "a_i", "1", "a_f", "0", "s_multi", "aaa", "s_multi", "bbb", "i_multi", "100", "i_multi", "200").add(id, "2", "a_s", "hello2", "a_i", "3", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "4", "a_f", "3").add(id, "4", "a_s", "hello4", "a_f", "4").add(id, "1", "a_s", "hello1", "a_i", "2", "a_f", "1").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
Tuple tuple;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class);
try {
// Basic test
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f, s_multi, i_multi\", qt=\"/export\", sort=\"a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 4, 0, 1, 2, 3);
tuple = tuples.get(0);
assertTrue("hello4".equals(tuple.getString("a_s")));
assertNull(tuple.get("s_multi"));
assertNull(tuple.get("i_multi"));
assertNull(tuple.getLong("a_i"));
tuple = tuples.get(1);
assertNull(tuple.get("a_s"));
List<String> strings = tuple.getStrings("s_multi");
assertNotNull(strings);
assertEquals("aaa", strings.get(0));
assertEquals("bbb", strings.get(1));
List<Long> longs = tuple.getLongs("i_multi");
assertNotNull(longs);
//test sort (asc) with null string field. Null should sort to the top.
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f, s_multi, i_multi\", qt=\"/export\", sort=\"a_s asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 0, 1, 2, 3, 4);
//test sort(desc) with null string field. Null should sort to the bottom.
expression = StreamExpressionParser.parse("search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f, s_multi, i_multi\", qt=\"/export\", sort=\"a_s desc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 5);
assertOrder(tuples, 4, 3, 2, 1, 0);
} finally {
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.
the class StreamExpressionTest method testBasicTextLogitStream.
@Test
public void testBasicTextLogitStream() throws Exception {
Assume.assumeTrue(!useAlias);
CollectionAdminRequest.createCollection("destinationCollection", "ml", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("destinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
UpdateRequest updateRequest = new UpdateRequest();
for (int i = 0; i < 5000; i += 2) {
updateRequest.add(id, String.valueOf(i), "tv_text", "a b c c d", "out_i", "1");
updateRequest.add(id, String.valueOf(i + 1), "tv_text", "a b e e f", "out_i", "0");
}
updateRequest.commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withCollectionZkHost("destinationCollection", cluster.getZkServer().getZkAddress()).withFunctionName("features", FeaturesSelectionStream.class).withFunctionName("train", TextLogitStream.class).withFunctionName("search", CloudSolrStream.class).withFunctionName("update", UpdateStream.class);
try {
expression = StreamExpressionParser.parse("features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4)");
stream = new FeaturesSelectionStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assert (tuples.size() == 4);
HashSet<String> terms = new HashSet<>();
for (Tuple tuple : tuples) {
terms.add((String) tuple.get("term_s"));
}
assertTrue(terms.contains("d"));
assertTrue(terms.contains("c"));
assertTrue(terms.contains("e"));
assertTrue(terms.contains("f"));
String textLogitExpression = "train(" + "collection1, " + "features(collection1, q=\"*:*\", featureSet=\"first\", field=\"tv_text\", outcome=\"out_i\", numTerms=4)," + "q=\"*:*\", " + "name=\"model\", " + "field=\"tv_text\", " + "outcome=\"out_i\", " + "maxIterations=100)";
stream = factory.constructStream(textLogitExpression);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
Tuple lastTuple = tuples.get(tuples.size() - 1);
List<Double> lastWeights = lastTuple.getDoubles("weights_ds");
Double[] lastWeightsArray = lastWeights.toArray(new Double[lastWeights.size()]);
// first feature is bias value
Double[] testRecord = { 1.0, 1.17, 0.691, 0.0, 0.0 };
double d = sum(multiply(testRecord, lastWeightsArray));
double prob = sigmoid(d);
assertEquals(prob, 1.0, 0.1);
// first feature is bias value
Double[] testRecord2 = { 1.0, 0.0, 0.0, 1.17, 0.691 };
d = sum(multiply(testRecord2, lastWeightsArray));
prob = sigmoid(d);
assertEquals(prob, 0, 0.1);
stream = factory.constructStream("update(destinationCollection, batchSize=5, " + textLogitExpression + ")");
getTuples(stream);
cluster.getSolrClient().commit("destinationCollection");
stream = factory.constructStream("search(destinationCollection, " + "q=*:*, " + "fl=\"iteration_i,* \", " + "rows=100, " + "sort=\"iteration_i desc\")");
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(100, tuples.size());
Tuple lastModel = tuples.get(0);
ClassificationEvaluation evaluation = ClassificationEvaluation.create(lastModel.fields);
assertTrue(evaluation.getF1() >= 1.0);
assertEquals(Math.log(5000.0 / (2500 + 1)), lastModel.getDoubles("idfs_ds").get(0), 0.0001);
// make sure the tuples is retrieved in correct order
Tuple firstTuple = tuples.get(99);
assertEquals(1L, (long) firstTuple.getLong("iteration_i"));
} finally {
CollectionAdminRequest.deleteCollection("destinationCollection").process(cluster.getSolrClient());
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.
the class StreamExpressionTest method testUpdateStream.
@Test
public void testUpdateStream() throws Exception {
CollectionAdminRequest.createCollection("destinationCollection", "conf", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("destinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777").commit(cluster.getSolrClient(), "collection1");
StreamExpression expression;
TupleStream stream;
Tuple t;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withCollectionZkHost("destinationCollection", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("update", UpdateStream.class);
try {
//Copy all docs to destinationCollection
expression = StreamExpressionParser.parse("update(destinationCollection, batchSize=5, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\"))");
stream = new UpdateStream(expression, factory);
stream.setStreamContext(streamContext);
List<Tuple> tuples = getTuples(stream);
cluster.getSolrClient().commit("destinationCollection");
//Ensure that all UpdateStream tuples indicate the correct number of copied/indexed docs
assert (tuples.size() == 1);
t = tuples.get(0);
assert (t.EOF == false);
assertEquals(5, t.get("batchIndexed"));
//Ensure that destinationCollection actually has the new docs.
expression = StreamExpressionParser.parse("search(destinationCollection, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(5, tuples.size());
Tuple tuple = tuples.get(0);
assert (tuple.getLong("id") == 0);
assert (tuple.get("a_s").equals("hello0"));
assert (tuple.getLong("a_i") == 0);
assert (tuple.getDouble("a_f") == 0.0);
assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb");
assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7"));
tuple = tuples.get(1);
assert (tuple.getLong("id") == 1);
assert (tuple.get("a_s").equals("hello1"));
assert (tuple.getLong("a_i") == 1);
assert (tuple.getDouble("a_f") == 1.0);
assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4");
assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777"));
tuple = tuples.get(2);
assert (tuple.getLong("id") == 2);
assert (tuple.get("a_s").equals("hello2"));
assert (tuple.getLong("a_i") == 2);
assert (tuple.getDouble("a_f") == 0.0);
assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1");
assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77"));
tuple = tuples.get(3);
assert (tuple.getLong("id") == 3);
assert (tuple.get("a_s").equals("hello3"));
assert (tuple.getLong("a_i") == 3);
assert (tuple.getDouble("a_f") == 3.0);
assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2");
assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777"));
tuple = tuples.get(4);
assert (tuple.getLong("id") == 4);
assert (tuple.get("a_s").equals("hello4"));
assert (tuple.getLong("a_i") == 4);
assert (tuple.getDouble("a_f") == 4.0);
assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3");
assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777"));
} finally {
CollectionAdminRequest.deleteCollection("destinationCollection").process(cluster.getSolrClient());
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.
the class StreamExpressionTest method testNullStream.
@Test
public void testNullStream() throws Exception {
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1").add(id, "5", "a_s", "hello1", "a_i", "1", "a_f", "2").commit(cluster.getSolrClient(), COLLECTIONORALIAS);
StreamExpression expression;
TupleStream stream;
List<Tuple> tuples;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
StreamFactory factory = new StreamFactory().withCollectionZkHost(COLLECTIONORALIAS, cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("null", NullStream.class);
try {
// Basic test
stream = factory.constructStream("null(search(" + COLLECTIONORALIAS + ", q=*:*, fl=\"id,a_s,a_i,a_f\", sort=\"a_f asc\"), by=\"a_i asc\")");
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertTrue(tuples.size() == 1);
assertTrue(tuples.get(0).getLong("nullCount") == 6);
} finally {
solrClientCache.close();
}
}
use of org.apache.solr.client.solrj.io.SolrClientCache in project lucene-solr by apache.
the class StreamExpressionTest method testParallelUpdateStream.
@Test
public void testParallelUpdateStream() throws Exception {
CollectionAdminRequest.createCollection("parallelDestinationCollection", "conf", 2, 1).process(cluster.getSolrClient());
AbstractDistribZkTestBase.waitForRecoveriesToFinish("parallelDestinationCollection", cluster.getSolrClient().getZkStateReader(), false, true, TIMEOUT);
new UpdateRequest().add(id, "0", "a_s", "hello0", "a_i", "0", "a_f", "0", "s_multi", "aaaa", "s_multi", "bbbb", "i_multi", "4", "i_multi", "7").add(id, "2", "a_s", "hello2", "a_i", "2", "a_f", "0", "s_multi", "aaaa1", "s_multi", "bbbb1", "i_multi", "44", "i_multi", "77").add(id, "3", "a_s", "hello3", "a_i", "3", "a_f", "3", "s_multi", "aaaa2", "s_multi", "bbbb2", "i_multi", "444", "i_multi", "777").add(id, "4", "a_s", "hello4", "a_i", "4", "a_f", "4", "s_multi", "aaaa3", "s_multi", "bbbb3", "i_multi", "4444", "i_multi", "7777").add(id, "1", "a_s", "hello1", "a_i", "1", "a_f", "1", "s_multi", "aaaa4", "s_multi", "bbbb4", "i_multi", "44444", "i_multi", "77777").commit(cluster.getSolrClient(), "collection1");
StreamExpression expression;
TupleStream stream;
Tuple t;
StreamContext streamContext = new StreamContext();
SolrClientCache solrClientCache = new SolrClientCache();
streamContext.setSolrClientCache(solrClientCache);
String zkHost = cluster.getZkServer().getZkAddress();
StreamFactory factory = new StreamFactory().withCollectionZkHost("collection1", cluster.getZkServer().getZkAddress()).withCollectionZkHost("parallelDestinationCollection", cluster.getZkServer().getZkAddress()).withFunctionName("search", CloudSolrStream.class).withFunctionName("update", UpdateStream.class).withFunctionName("parallel", ParallelStream.class);
try {
//Copy all docs to destinationCollection
String updateExpression = "update(parallelDestinationCollection, batchSize=2, search(collection1, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_f asc, a_i asc\", partitionKeys=\"a_f\"))";
TupleStream parallelUpdateStream = factory.constructStream("parallel(collection1, " + updateExpression + ", workers=\"2\", zkHost=\"" + zkHost + "\", sort=\"batchNumber asc\")");
parallelUpdateStream.setStreamContext(streamContext);
List<Tuple> tuples = getTuples(parallelUpdateStream);
cluster.getSolrClient().commit("parallelDestinationCollection");
//Ensure that all UpdateStream tuples indicate the correct number of copied/indexed docs
long count = 0;
for (Tuple tuple : tuples) {
count += tuple.getLong("batchIndexed");
}
assert (count == 5);
//Ensure that destinationCollection actually has the new docs.
expression = StreamExpressionParser.parse("search(parallelDestinationCollection, q=*:*, fl=\"id,a_s,a_i,a_f,s_multi,i_multi\", sort=\"a_i asc\")");
stream = new CloudSolrStream(expression, factory);
stream.setStreamContext(streamContext);
tuples = getTuples(stream);
assertEquals(5, tuples.size());
Tuple tuple = tuples.get(0);
assert (tuple.getLong("id") == 0);
assert (tuple.get("a_s").equals("hello0"));
assert (tuple.getLong("a_i") == 0);
assert (tuple.getDouble("a_f") == 0.0);
assertList(tuple.getStrings("s_multi"), "aaaa", "bbbb");
assertList(tuple.getLongs("i_multi"), Long.parseLong("4"), Long.parseLong("7"));
tuple = tuples.get(1);
assert (tuple.getLong("id") == 1);
assert (tuple.get("a_s").equals("hello1"));
assert (tuple.getLong("a_i") == 1);
assert (tuple.getDouble("a_f") == 1.0);
assertList(tuple.getStrings("s_multi"), "aaaa4", "bbbb4");
assertList(tuple.getLongs("i_multi"), Long.parseLong("44444"), Long.parseLong("77777"));
tuple = tuples.get(2);
assert (tuple.getLong("id") == 2);
assert (tuple.get("a_s").equals("hello2"));
assert (tuple.getLong("a_i") == 2);
assert (tuple.getDouble("a_f") == 0.0);
assertList(tuple.getStrings("s_multi"), "aaaa1", "bbbb1");
assertList(tuple.getLongs("i_multi"), Long.parseLong("44"), Long.parseLong("77"));
tuple = tuples.get(3);
assert (tuple.getLong("id") == 3);
assert (tuple.get("a_s").equals("hello3"));
assert (tuple.getLong("a_i") == 3);
assert (tuple.getDouble("a_f") == 3.0);
assertList(tuple.getStrings("s_multi"), "aaaa2", "bbbb2");
assertList(tuple.getLongs("i_multi"), Long.parseLong("444"), Long.parseLong("777"));
tuple = tuples.get(4);
assert (tuple.getLong("id") == 4);
assert (tuple.get("a_s").equals("hello4"));
assert (tuple.getLong("a_i") == 4);
assert (tuple.getDouble("a_f") == 4.0);
assertList(tuple.getStrings("s_multi"), "aaaa3", "bbbb3");
assertList(tuple.getLongs("i_multi"), Long.parseLong("4444"), Long.parseLong("7777"));
} finally {
CollectionAdminRequest.deleteCollection("parallelDestinationCollection").process(cluster.getSolrClient());
solrClientCache.close();
}
}
Aggregations