use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class TopicClassificationEngine method updatePerformanceMetadata.
/**
* Update the performance statistics in a metadata entry of a topic. It is the responsibility of the
* caller to commit.
*/
protected void updatePerformanceMetadata(String conceptId, float precision, float recall, int positiveSupport, int negativeSupport, List<String> falsePositiveExamples, List<String> falseNegativeExamples) throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
try {
SolrQuery query = new SolrQuery("*:*");
query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
query.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
for (SolrDocument result : solrServer.query(query).getResults()) {
// there should be only one (or none: tolerated)
// fetch any old values to update (all metadata fields are assumed to be stored)s
Map<String, Collection<Object>> fieldValues = new HashMap<String, Collection<Object>>();
for (String fieldName : result.getFieldNames()) {
fieldValues.put(fieldName, result.getFieldValues(fieldName));
}
addToList(fieldValues, precisionField, precision);
addToList(fieldValues, recallField, recall);
increment(fieldValues, positiveSupportField, positiveSupport);
increment(fieldValues, negativeSupportField, negativeSupport);
addToList(fieldValues, falsePositivesField, falsePositiveExamples);
addToList(fieldValues, falseNegativesField, falseNegativeExamples);
SolrInputDocument newEntry = new SolrInputDocument();
for (Map.Entry<String, Collection<Object>> entry : fieldValues.entrySet()) {
newEntry.addField(entry.getKey(), entry.getValue());
}
newEntry.setField(modelEvaluationDateField, UTCTimeStamper.nowUtcDate());
solrServer.add(newEntry);
}
log.info(String.format("Performance for concept '%s': precision=%f, recall=%f," + " positiveSupport=%d, negativeSupport=%d", conceptId, precision, recall, positiveSupport, negativeSupport));
} catch (Exception e) {
String msg = String.format("Error updating performance metadata for topic '%s' on Solr Core '%s'", conceptId, solrCoreId);
throw new ClassifierException(msg, e);
}
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class TopicClassificationEngine method updatePerformanceEstimates.
public synchronized int updatePerformanceEstimates(boolean incremental) throws ClassifierException, TrainingSetException {
checkTrainingSet();
if (evaluationRunning) {
throw new ClassifierException("Another evaluation is already running");
}
int updatedTopics = 0;
// is now created within the #embeddedSolrServerDir
try {
evaluationRunning = true;
// 3-folds CV is hardcoded for now
int cvFoldCount = 3;
// make it possible to limit the number of folds to use
int cvIterationCount = 3;
// We will use the training set quite intensively, ensure that the index is packed and its
// statistics are up to date
getTrainingSet().optimize();
for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount, incremental);
}
SolrServer solrServer = getActiveSolrServer();
solrServer.optimize();
} catch (ConfigurationException e) {
throw new ClassifierException(e);
} catch (IOException e) {
throw new ClassifierException(e);
} catch (SolrServerException e) {
throw new ClassifierException(e);
} finally {
FileUtils.deleteQuietly(__evaluationServerDir);
evaluationRunning = false;
}
return updatedTopics;
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class SolrTrainingSet method getExamples.
protected Batch<Example> getExamples(List<String> topics, Object offset, boolean positive) throws TrainingSetException {
List<Example> items = new ArrayList<Example>();
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery();
List<String> parts = new ArrayList<String>();
String q = "";
if (topics.isEmpty()) {
q += "*:*";
} else if (positive) {
for (String topic : topics) {
parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
if (offset != null) {
q += "(";
}
q += StringUtils.join(parts, " OR ");
if (offset != null) {
q += ")";
}
} else {
for (String topic : topics) {
parts.add("-" + topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
q += StringUtils.join(parts, " AND ");
}
if (offset != null) {
q += " AND " + exampleIdField + ":[" + offset.toString() + " TO *]";
}
query.setQuery(q);
query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
query.set("rows", batchSize + 1);
String nextExampleId = null;
try {
int count = 0;
QueryResponse response = solrServer.query(query);
for (SolrDocument result : response.getResults()) {
if (count == batchSize) {
nextExampleId = result.getFirstValue(exampleIdField).toString();
} else {
count++;
String exampleId = result.getFirstValue(exampleIdField).toString();
Collection<Object> labelValues = result.getFieldValues(topicUrisField);
Collection<Object> textValues = result.getFieldValues(exampleTextField);
if (textValues == null) {
continue;
}
items.add(new Example(exampleId, labelValues, textValues));
}
}
} catch (SolrServerException e) {
String msg = String.format("Error while fetching positive examples for topics ['%s'] on Solr Core '%s'.", StringUtils.join(topics, "', '"), solrCoreId);
throw new TrainingSetException(msg, e);
}
return new Batch<Example>(items, nextExampleId != null, nextExampleId);
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class SolrTrainingSet method registerExample.
@Override
public String registerExample(String exampleId, String text, List<String> topics) throws TrainingSetException {
if (text == null) {
// special case: example removal
if (exampleId == null) {
throw new IllegalArgumentException("exampleId and text should not be null simultaneously");
}
SolrServer solrServer = getActiveSolrServer();
try {
solrServer.deleteByQuery(exampleIdField + ":" + exampleId);
solrServer.commit();
return exampleId;
} catch (Exception e) {
String msg = String.format("Error deleting example with id '%s' on Solr Core '%s'", exampleId, solrCoreId);
throw new TrainingSetException(msg, e);
}
}
if (exampleId == null || exampleId.isEmpty()) {
exampleId = UUID.randomUUID().toString();
}
SolrInputDocument doc = new SolrInputDocument();
doc.addField(exampleIdField, exampleId);
doc.addField(exampleTextField, text);
if (topics != null) {
doc.addField(topicUrisField, topics);
}
doc.addField(modificationDateField, UTCTimeStamper.nowUtcDate());
SolrServer server = getActiveSolrServer();
try {
server.add(doc);
server.commit();
} catch (Exception e) {
String msg = String.format("Could not register example '%s' with topics: ['%s']", exampleId, StringUtils.join(topics, "', '"));
throw new TrainingSetException(msg, e);
}
return exampleId;
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class SolrTrainingSet method hasChangedSince.
@Override
public boolean hasChangedSince(List<String> topics, Date referenceDate) throws TrainingSetException {
String utcIsoDate = UTCTimeStamper.utcIsoString(referenceDate);
StringBuffer sb = new StringBuffer();
sb.append(modificationDateField);
sb.append(":[");
sb.append(utcIsoDate);
sb.append(" TO *]");
if (topics != null && topics.size() > 0) {
sb.append(" AND (");
List<String> parts = new ArrayList<String>();
for (String topic : topics) {
// use a nested query to avoid string escaping issues with special solr chars
parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
sb.append(StringUtils.join(parts, " OR "));
sb.append(")");
}
SolrQuery query = new SolrQuery(sb.toString());
query.setRows(1);
query.setFields(exampleIdField);
try {
SolrServer solrServer = getActiveSolrServer();
return solrServer.query(query).getResults().size() > 0;
} catch (SolrServerException e) {
String msg = String.format("Error while fetching topics for examples modified after '%s' on Solr Core '%s'.", utcIsoDate, solrCoreId);
throw new TrainingSetException(msg, e);
}
}
Aggregations