use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class TopicClassificationEngine method batchOverTopics.
protected int batchOverTopics(BatchProcessor<SolrDocument> processor) throws TrainingSetException {
// TODO: implement incremental update by using the date informations
int processedCount = 0;
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery("*:*");
query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
String offset = null;
boolean done = false;
int batchSize = 1000;
query.addSortField(conceptUriField, SolrQuery.ORDER.asc);
query.setRows(batchSize + 1);
try {
while (!done) {
// batch over all the indexed topics
if (offset != null) {
query.addFilterQuery(conceptUriField + ":[" + ClientUtils.escapeQueryChars(offset) + " TO *]");
}
QueryResponse response = solrServer.query(query);
int count = 0;
List<SolrDocument> batchDocuments = new ArrayList<SolrDocument>();
for (SolrDocument result : response.getResults()) {
String conceptId = result.getFirstValue(conceptUriField).toString();
if (count == batchSize) {
offset = conceptId;
} else {
count++;
batchDocuments.add(result);
}
}
processedCount += processor.process(batchDocuments);
solrServer.commit();
if (count < batchSize) {
done = true;
}
}
solrServer.optimize();
} catch (Exception e) {
String msg = String.format("Error while updating topics on Solr Core '%s'.", solrCoreId);
throw new TrainingSetException(msg, e);
}
return processedCount;
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class TopicClassificationEngine method removeAllConcepts.
@Override
public void removeAllConcepts() throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
try {
solrServer.deleteByQuery("*:*");
solrServer.commit();
} catch (Exception e) {
String msg = String.format("Error deleting concepts from Solr Core '%s'", solrCoreId);
throw new ClassifierException(msg, e);
}
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class TopicClassificationEngine method suggestTopics.
public List<TopicSuggestion> suggestTopics(String text) throws ClassifierException {
List<TopicSuggestion> suggestedTopics = new ArrayList<TopicSuggestion>(MAX_SUGGESTIONS * 3);
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery();
query.setRequestHandler("/" + MoreLikeThisParams.MLT);
query.setFilterQueries(entryTypeField + ":" + MODEL_ENTRY);
query.set(MoreLikeThisParams.MATCH_INCLUDE, false);
query.set(MoreLikeThisParams.MIN_DOC_FREQ, 1);
query.set(MoreLikeThisParams.MIN_TERM_FREQ, 1);
query.set(MoreLikeThisParams.MAX_QUERY_TERMS, 30);
query.set(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, 10000);
// TODO: find a way to parse the interesting terms and report them
// for debugging / explanation in dedicated RDF data structure.
// query.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
query.set(MoreLikeThisParams.SIMILARITY_FIELDS, similarityField);
query.set(CommonParams.STREAM_BODY, text);
// over query the number of suggestions to find a statistical cut based on the curve of the scores of
// the top suggestion
query.setRows(MAX_SUGGESTIONS * 3);
query.setFields(conceptUriField);
query.setIncludeScore(true);
try {
StreamQueryRequest request = new StreamQueryRequest(query);
QueryResponse response = request.process(solrServer);
SolrDocumentList results = response.getResults();
for (SolrDocument result : results.toArray(new SolrDocument[0])) {
String conceptUri = (String) result.getFirstValue(conceptUriField);
if (conceptUri == null) {
throw new ClassifierException(String.format("Solr Core '%s' is missing required field '%s'.", solrCoreId, conceptUriField));
}
Float score = (Float) result.getFirstValue("score");
// fetch metadata
SolrQuery metadataQuery = new SolrQuery("*:*");
// use filter queries to leverage the Solr cache explicitly
metadataQuery.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
metadataQuery.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptUri));
metadataQuery.setFields(conceptUriField, broaderField, primaryTopicUriField);
SolrDocument metadata = solrServer.query(metadataQuery).getResults().get(0);
String primaryTopicUri = (String) metadata.getFirstValue(primaryTopicUriField);
suggestedTopics.add(new TopicSuggestion(conceptUri, primaryTopicUri, metadata.getFieldValues(broaderField), score));
}
} catch (SolrServerException e) {
if ("unknown handler: /mlt".equals(e.getCause().getMessage())) {
String message = String.format("SolrServer with id '%s' for topic engine '%s' lacks" + " configuration for the MoreLikeThisHandler", solrCoreId, engineName);
throw new ClassifierException(message, e);
} else {
throw new ClassifierException(e);
}
}
if (suggestedTopics.size() <= 1) {
// no need to apply the cutting heuristic
return suggestedTopics;
}
// filter out suggestions that are less than some threshold based on the mean of the top scores
float mean = 0.0f;
for (TopicSuggestion suggestion : suggestedTopics) {
mean += suggestion.score / suggestedTopics.size();
}
float threshold = 0.25f * suggestedTopics.get(0).score + 0.75f * mean;
List<TopicSuggestion> filteredSuggestions = new ArrayList<TopicSuggestion>();
for (TopicSuggestion suggestion : suggestedTopics) {
if (filteredSuggestions.size() >= MAX_SUGGESTIONS) {
return filteredSuggestions;
}
if (filteredSuggestions.isEmpty() || suggestion.score > threshold) {
filteredSuggestions.add(suggestion);
} else {
break;
}
}
return filteredSuggestions;
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class TopicClassificationEngine method invalidateModelFields.
/*
* The commit is the responsibility of the caller.
*/
protected void invalidateModelFields(Collection<String> conceptIds, String... fieldNames) throws ClassifierException {
if (conceptIds.isEmpty() || fieldNames.length == 0) {
return;
}
SolrServer solrServer = getActiveSolrServer();
List<String> invalidatedFields = Arrays.asList(fieldNames);
try {
UpdateRequest request = new UpdateRequest();
for (String conceptId : conceptIds) {
SolrQuery query = new SolrQuery("*:*");
query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
query.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
for (SolrDocument result : solrServer.query(query).getResults()) {
// there should be only one (or none: tolerated)
SolrInputDocument newEntry = new SolrInputDocument();
for (String fieldName : result.getFieldNames()) {
if (!invalidatedFields.contains(fieldName)) {
newEntry.setField(fieldName, result.getFieldValues(fieldName));
}
}
request.add(newEntry);
}
}
if (request.getDocuments() != null && request.getDocuments().size() > 0) {
solrServer.request(request);
}
} catch (Exception e) {
String msg = String.format("Error invalidating topics [%s] on Solr Core '%s'", StringUtils.join(conceptIds, ", "), solrCoreId);
throw new ClassifierException(msg, e);
}
}
use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.
the class ConfiguredSolrCoreTracker method configureSolrCore.
protected void configureSolrCore(Dictionary<String, Object> config, String solrCoreProperty, String defaultCoreId, String solrCoreConfigProperty) throws ConfigurationException {
Object solrCoreInfo = config.get(solrCoreProperty);
if (solrCoreInfo instanceof SolrServer) {
// Bind a fixed Solr server client instead of doing dynamic OSGi lookup using the service tracker.
// This can be useful both for unit-testing .
solrServer = (SolrServer) config.get(solrCoreProperty);
solrCoreConfig = TopicClassificationEngine.DEFAULT_SOLR_CORE_CONFIG;
} else {
if (context == null) {
throw new ConfigurationException(solrCoreProperty, solrCoreProperty + " should be a SolrServer instance for using" + " the engine without any OSGi context. Got: " + solrCoreId);
}
if (solrCoreInfo != null && !solrCoreInfo.toString().trim().isEmpty()) {
this.solrCoreId = solrCoreInfo.toString().trim();
} else {
this.solrCoreId = defaultCoreId;
}
solrCoreConfig = getRequiredStringParam(config, solrCoreConfigProperty, this.solrCoreId + ".solrindex.zip");
try {
IndexReference indexReference = IndexReference.parse(solrCoreId);
//String configName = getRequiredStringParam(config, SOLR_CONFIG, defaultValue)
indexReference = checkInitSolrIndex(indexReference, solrCoreConfig);
// track the solr core OSGi updates
indexTracker = new RegisteredSolrServerTracker(context.getBundleContext(), indexReference);
indexTracker.open();
} catch (Exception e) {
throw new ConfigurationException(solrCoreProperty, e.getMessage(), e);
}
}
}
Aggregations