use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.
the class SolrYard method update.
@Override
public final Iterable<Representation> update(Iterable<Representation> representations) throws YardException, IllegalArgumentException, NullPointerException {
if (representations == null) {
throw new IllegalArgumentException("The parsed Iterable over Representations MUST NOT be NULL!");
}
long start = System.currentTimeMillis();
Set<String> ids = new HashSet<String>();
for (Representation representation : representations) {
if (representation != null) {
ids.add(representation.getId());
}
}
if (closed) {
log.warn("The SolrYard '{}' was already closed!", config.getName());
}
// for debuging
int numDocs = ids.size();
try {
// returns the ids found in the solrIndex
ids = checkRepresentations(ids);
} catch (SolrServerException e) {
throw new YardException("Error while searching for alredy present documents " + "before executing the actual update for the parsed Representations", e);
} catch (IOException e) {
throw new YardException("Unable to access SolrServer", e);
}
long checked = System.currentTimeMillis();
List<SolrInputDocument> inputDocs = new ArrayList<SolrInputDocument>(ids.size());
List<Representation> updated = new ArrayList<Representation>();
for (Representation representation : representations) {
if (representation != null && ids.contains(representation.getId())) {
// null parsed or not
// already present
inputDocs.add(createSolrInputDocument(representation));
updated.add(representation);
}
}
long created = System.currentTimeMillis();
if (!inputDocs.isEmpty()) {
try {
final UpdateRequest update = new UpdateRequest();
if (!immediateCommit) {
update.setCommitWithin(commitWithin);
}
update.add(inputDocs);
AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {
public UpdateResponse run() throws IOException, SolrServerException {
update.process(server);
if (immediateCommit) {
server.commit();
}
return null;
}
});
} catch (PrivilegedActionException pae) {
if (pae.getException() instanceof SolrServerException) {
throw new YardException("Error while adding updated Documents to the SolrServer", pae.getException());
} else if (pae.getException() instanceof IOException) {
throw new YardException("Unable to access SolrServer", pae.getException());
} else {
throw RuntimeException.class.cast(pae.getException());
}
}
}
long ready = System.currentTimeMillis();
log.info(String.format("Processed updateRequest for %d documents (%d in index " + "| %d updated) in %dms (checked %dms|created %dms| stored%dms)", numDocs, ids.size(), updated.size(), ready - start, checked - start, created - checked, ready - created));
return updated;
}
use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.
the class TopicClassificationEngine method updatePerformanceEstimates.
public synchronized int updatePerformanceEstimates(boolean incremental) throws ClassifierException, TrainingSetException {
checkTrainingSet();
if (evaluationRunning) {
throw new ClassifierException("Another evaluation is already running");
}
int updatedTopics = 0;
// is now created within the #embeddedSolrServerDir
try {
evaluationRunning = true;
// 3-folds CV is hardcoded for now
int cvFoldCount = 3;
// make it possible to limit the number of folds to use
int cvIterationCount = 3;
// We will use the training set quite intensively, ensure that the index is packed and its
// statistics are up to date
getTrainingSet().optimize();
for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount, incremental);
}
SolrServer solrServer = getActiveSolrServer();
solrServer.optimize();
} catch (ConfigurationException e) {
throw new ClassifierException(e);
} catch (IOException e) {
throw new ClassifierException(e);
} catch (SolrServerException e) {
throw new ClassifierException(e);
} finally {
FileUtils.deleteQuietly(__evaluationServerDir);
evaluationRunning = false;
}
return updatedTopics;
}
use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.
the class TopicClassificationEngine method suggestTopics.
public List<TopicSuggestion> suggestTopics(String text) throws ClassifierException {
List<TopicSuggestion> suggestedTopics = new ArrayList<TopicSuggestion>(MAX_SUGGESTIONS * 3);
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery();
query.setRequestHandler("/" + MoreLikeThisParams.MLT);
query.setFilterQueries(entryTypeField + ":" + MODEL_ENTRY);
query.set(MoreLikeThisParams.MATCH_INCLUDE, false);
query.set(MoreLikeThisParams.MIN_DOC_FREQ, 1);
query.set(MoreLikeThisParams.MIN_TERM_FREQ, 1);
query.set(MoreLikeThisParams.MAX_QUERY_TERMS, 30);
query.set(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, 10000);
// TODO: find a way to parse the interesting terms and report them
// for debugging / explanation in dedicated RDF data structure.
// query.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
query.set(MoreLikeThisParams.SIMILARITY_FIELDS, similarityField);
query.set(CommonParams.STREAM_BODY, text);
// over query the number of suggestions to find a statistical cut based on the curve of the scores of
// the top suggestion
query.setRows(MAX_SUGGESTIONS * 3);
query.setFields(conceptUriField);
query.setIncludeScore(true);
try {
StreamQueryRequest request = new StreamQueryRequest(query);
QueryResponse response = request.process(solrServer);
SolrDocumentList results = response.getResults();
for (SolrDocument result : results.toArray(new SolrDocument[0])) {
String conceptUri = (String) result.getFirstValue(conceptUriField);
if (conceptUri == null) {
throw new ClassifierException(String.format("Solr Core '%s' is missing required field '%s'.", solrCoreId, conceptUriField));
}
Float score = (Float) result.getFirstValue("score");
// fetch metadata
SolrQuery metadataQuery = new SolrQuery("*:*");
// use filter queries to leverage the Solr cache explicitly
metadataQuery.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
metadataQuery.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptUri));
metadataQuery.setFields(conceptUriField, broaderField, primaryTopicUriField);
SolrDocument metadata = solrServer.query(metadataQuery).getResults().get(0);
String primaryTopicUri = (String) metadata.getFirstValue(primaryTopicUriField);
suggestedTopics.add(new TopicSuggestion(conceptUri, primaryTopicUri, metadata.getFieldValues(broaderField), score));
}
} catch (SolrServerException e) {
if ("unknown handler: /mlt".equals(e.getCause().getMessage())) {
String message = String.format("SolrServer with id '%s' for topic engine '%s' lacks" + " configuration for the MoreLikeThisHandler", solrCoreId, engineName);
throw new ClassifierException(message, e);
} else {
throw new ClassifierException(e);
}
}
if (suggestedTopics.size() <= 1) {
// no need to apply the cutting heuristic
return suggestedTopics;
}
// filter out suggestions that are less than some threshold based on the mean of the top scores
float mean = 0.0f;
for (TopicSuggestion suggestion : suggestedTopics) {
mean += suggestion.score / suggestedTopics.size();
}
float threshold = 0.25f * suggestedTopics.get(0).score + 0.75f * mean;
List<TopicSuggestion> filteredSuggestions = new ArrayList<TopicSuggestion>();
for (TopicSuggestion suggestion : suggestedTopics) {
if (filteredSuggestions.size() >= MAX_SUGGESTIONS) {
return filteredSuggestions;
}
if (filteredSuggestions.isEmpty() || suggestion.score > threshold) {
filteredSuggestions.add(suggestion);
} else {
break;
}
}
return filteredSuggestions;
}
use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.
the class SolrTrainingSet method hasChangedSince.
@Override
public boolean hasChangedSince(List<String> topics, Date referenceDate) throws TrainingSetException {
String utcIsoDate = UTCTimeStamper.utcIsoString(referenceDate);
StringBuffer sb = new StringBuffer();
sb.append(modificationDateField);
sb.append(":[");
sb.append(utcIsoDate);
sb.append(" TO *]");
if (topics != null && topics.size() > 0) {
sb.append(" AND (");
List<String> parts = new ArrayList<String>();
for (String topic : topics) {
// use a nested query to avoid string escaping issues with special solr chars
parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
sb.append(StringUtils.join(parts, " OR "));
sb.append(")");
}
SolrQuery query = new SolrQuery(sb.toString());
query.setRows(1);
query.setFields(exampleIdField);
try {
SolrServer solrServer = getActiveSolrServer();
return solrServer.query(query).getResults().size() > 0;
} catch (SolrServerException e) {
String msg = String.format("Error while fetching topics for examples modified after '%s' on Solr Core '%s'.", utcIsoDate, solrCoreId);
throw new TrainingSetException(msg, e);
}
}
use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.
the class SolrTrainingSet method getExamples.
protected Batch<Example> getExamples(List<String> topics, Object offset, boolean positive) throws TrainingSetException {
List<Example> items = new ArrayList<Example>();
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery();
List<String> parts = new ArrayList<String>();
String q = "";
if (topics.isEmpty()) {
q += "*:*";
} else if (positive) {
for (String topic : topics) {
parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
if (offset != null) {
q += "(";
}
q += StringUtils.join(parts, " OR ");
if (offset != null) {
q += ")";
}
} else {
for (String topic : topics) {
parts.add("-" + topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
q += StringUtils.join(parts, " AND ");
}
if (offset != null) {
q += " AND " + exampleIdField + ":[" + offset.toString() + " TO *]";
}
query.setQuery(q);
query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
query.set("rows", batchSize + 1);
String nextExampleId = null;
try {
int count = 0;
QueryResponse response = solrServer.query(query);
for (SolrDocument result : response.getResults()) {
if (count == batchSize) {
nextExampleId = result.getFirstValue(exampleIdField).toString();
} else {
count++;
String exampleId = result.getFirstValue(exampleIdField).toString();
Collection<Object> labelValues = result.getFieldValues(topicUrisField);
Collection<Object> textValues = result.getFieldValues(exampleTextField);
if (textValues == null) {
continue;
}
items.add(new Example(exampleId, labelValues, textValues));
}
}
} catch (SolrServerException e) {
String msg = String.format("Error while fetching positive examples for topics ['%s'] on Solr Core '%s'.", StringUtils.join(topics, "', '"), solrCoreId);
throw new TrainingSetException(msg, e);
}
return new Batch<Example>(items, nextExampleId != null, nextExampleId);
}
Aggregations