Search in sources :

Example 86 with SolrServerException

use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.

the class SolrYard method update.

@Override
public final Iterable<Representation> update(Iterable<Representation> representations) throws YardException, IllegalArgumentException, NullPointerException {
    if (representations == null) {
        throw new IllegalArgumentException("The parsed Iterable over Representations MUST NOT be NULL!");
    }
    long start = System.currentTimeMillis();
    Set<String> ids = new HashSet<String>();
    for (Representation representation : representations) {
        if (representation != null) {
            ids.add(representation.getId());
        }
    }
    if (closed) {
        log.warn("The SolrYard '{}' was already closed!", config.getName());
    }
    // for debuging
    int numDocs = ids.size();
    try {
        // returns the ids found in the solrIndex
        ids = checkRepresentations(ids);
    } catch (SolrServerException e) {
        throw new YardException("Error while searching for alredy present documents " + "before executing the actual update for the parsed Representations", e);
    } catch (IOException e) {
        throw new YardException("Unable to access SolrServer", e);
    }
    long checked = System.currentTimeMillis();
    List<SolrInputDocument> inputDocs = new ArrayList<SolrInputDocument>(ids.size());
    List<Representation> updated = new ArrayList<Representation>();
    for (Representation representation : representations) {
        if (representation != null && ids.contains(representation.getId())) {
            // null parsed or not
            // already present
            inputDocs.add(createSolrInputDocument(representation));
            updated.add(representation);
        }
    }
    long created = System.currentTimeMillis();
    if (!inputDocs.isEmpty()) {
        try {
            final UpdateRequest update = new UpdateRequest();
            if (!immediateCommit) {
                update.setCommitWithin(commitWithin);
            }
            update.add(inputDocs);
            AccessController.doPrivileged(new PrivilegedExceptionAction<Object>() {

                public UpdateResponse run() throws IOException, SolrServerException {
                    update.process(server);
                    if (immediateCommit) {
                        server.commit();
                    }
                    return null;
                }
            });
        } catch (PrivilegedActionException pae) {
            if (pae.getException() instanceof SolrServerException) {
                throw new YardException("Error while adding updated Documents to the SolrServer", pae.getException());
            } else if (pae.getException() instanceof IOException) {
                throw new YardException("Unable to access SolrServer", pae.getException());
            } else {
                throw RuntimeException.class.cast(pae.getException());
            }
        }
    }
    long ready = System.currentTimeMillis();
    log.info(String.format("Processed updateRequest for %d documents (%d in index " + "| %d updated) in %dms (checked %dms|created %dms| stored%dms)", numDocs, ids.size(), updated.size(), ready - start, checked - start, created - checked, ready - created));
    return updated;
}
Also used : UpdateRequest(org.apache.solr.client.solrj.request.UpdateRequest) PrivilegedActionException(java.security.PrivilegedActionException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) Representation(org.apache.stanbol.entityhub.servicesapi.model.Representation) IOException(java.io.IOException) Constraint(org.apache.stanbol.entityhub.servicesapi.query.Constraint) UpdateResponse(org.apache.solr.client.solrj.response.UpdateResponse) SolrInputDocument(org.apache.solr.common.SolrInputDocument) YardException(org.apache.stanbol.entityhub.servicesapi.yard.YardException) HashSet(java.util.HashSet)

Example 87 with SolrServerException

use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.

the class TopicClassificationEngine method updatePerformanceEstimates.

public synchronized int updatePerformanceEstimates(boolean incremental) throws ClassifierException, TrainingSetException {
    checkTrainingSet();
    if (evaluationRunning) {
        throw new ClassifierException("Another evaluation is already running");
    }
    int updatedTopics = 0;
    // is now created within the #embeddedSolrServerDir
    try {
        evaluationRunning = true;
        // 3-folds CV is hardcoded for now
        int cvFoldCount = 3;
        // make it possible to limit the number of folds to use
        int cvIterationCount = 3;
        // We will use the training set quite intensively, ensure that the index is packed and its
        // statistics are up to date
        getTrainingSet().optimize();
        for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
            updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount, incremental);
        }
        SolrServer solrServer = getActiveSolrServer();
        solrServer.optimize();
    } catch (ConfigurationException e) {
        throw new ClassifierException(e);
    } catch (IOException e) {
        throw new ClassifierException(e);
    } catch (SolrServerException e) {
        throw new ClassifierException(e);
    } finally {
        FileUtils.deleteQuietly(__evaluationServerDir);
        evaluationRunning = false;
    }
    return updatedTopics;
}
Also used : ConfigurationException(org.osgi.service.cm.ConfigurationException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 88 with SolrServerException

use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.

the class TopicClassificationEngine method suggestTopics.

public List<TopicSuggestion> suggestTopics(String text) throws ClassifierException {
    List<TopicSuggestion> suggestedTopics = new ArrayList<TopicSuggestion>(MAX_SUGGESTIONS * 3);
    SolrServer solrServer = getActiveSolrServer();
    SolrQuery query = new SolrQuery();
    query.setRequestHandler("/" + MoreLikeThisParams.MLT);
    query.setFilterQueries(entryTypeField + ":" + MODEL_ENTRY);
    query.set(MoreLikeThisParams.MATCH_INCLUDE, false);
    query.set(MoreLikeThisParams.MIN_DOC_FREQ, 1);
    query.set(MoreLikeThisParams.MIN_TERM_FREQ, 1);
    query.set(MoreLikeThisParams.MAX_QUERY_TERMS, 30);
    query.set(MoreLikeThisParams.MAX_NUM_TOKENS_PARSED, 10000);
    // TODO: find a way to parse the interesting terms and report them
    // for debugging / explanation in dedicated RDF data structure.
    // query.set(MoreLikeThisParams.INTERESTING_TERMS, "details");
    query.set(MoreLikeThisParams.SIMILARITY_FIELDS, similarityField);
    query.set(CommonParams.STREAM_BODY, text);
    // over query the number of suggestions to find a statistical cut based on the curve of the scores of
    // the top suggestion
    query.setRows(MAX_SUGGESTIONS * 3);
    query.setFields(conceptUriField);
    query.setIncludeScore(true);
    try {
        StreamQueryRequest request = new StreamQueryRequest(query);
        QueryResponse response = request.process(solrServer);
        SolrDocumentList results = response.getResults();
        for (SolrDocument result : results.toArray(new SolrDocument[0])) {
            String conceptUri = (String) result.getFirstValue(conceptUriField);
            if (conceptUri == null) {
                throw new ClassifierException(String.format("Solr Core '%s' is missing required field '%s'.", solrCoreId, conceptUriField));
            }
            Float score = (Float) result.getFirstValue("score");
            // fetch metadata
            SolrQuery metadataQuery = new SolrQuery("*:*");
            // use filter queries to leverage the Solr cache explicitly
            metadataQuery.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
            metadataQuery.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptUri));
            metadataQuery.setFields(conceptUriField, broaderField, primaryTopicUriField);
            SolrDocument metadata = solrServer.query(metadataQuery).getResults().get(0);
            String primaryTopicUri = (String) metadata.getFirstValue(primaryTopicUriField);
            suggestedTopics.add(new TopicSuggestion(conceptUri, primaryTopicUri, metadata.getFieldValues(broaderField), score));
        }
    } catch (SolrServerException e) {
        if ("unknown handler: /mlt".equals(e.getCause().getMessage())) {
            String message = String.format("SolrServer with id '%s' for topic engine '%s' lacks" + " configuration for the MoreLikeThisHandler", solrCoreId, engineName);
            throw new ClassifierException(message, e);
        } else {
            throw new ClassifierException(e);
        }
    }
    if (suggestedTopics.size() <= 1) {
        // no need to apply the cutting heuristic
        return suggestedTopics;
    }
    // filter out suggestions that are less than some threshold based on the mean of the top scores
    float mean = 0.0f;
    for (TopicSuggestion suggestion : suggestedTopics) {
        mean += suggestion.score / suggestedTopics.size();
    }
    float threshold = 0.25f * suggestedTopics.get(0).score + 0.75f * mean;
    List<TopicSuggestion> filteredSuggestions = new ArrayList<TopicSuggestion>();
    for (TopicSuggestion suggestion : suggestedTopics) {
        if (filteredSuggestions.size() >= MAX_SUGGESTIONS) {
            return filteredSuggestions;
        }
        if (filteredSuggestions.isEmpty() || suggestion.score > threshold) {
            filteredSuggestions.add(suggestion);
        } else {
            break;
        }
    }
    return filteredSuggestions;
}
Also used : StreamQueryRequest(org.apache.stanbol.commons.solr.utils.StreamQueryRequest) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) SolrDocumentList(org.apache.solr.common.SolrDocumentList) TopicSuggestion(org.apache.stanbol.enhancer.topic.api.TopicSuggestion) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SolrDocument(org.apache.solr.common.SolrDocument) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 89 with SolrServerException

use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.

the class SolrTrainingSet method hasChangedSince.

@Override
public boolean hasChangedSince(List<String> topics, Date referenceDate) throws TrainingSetException {
    String utcIsoDate = UTCTimeStamper.utcIsoString(referenceDate);
    StringBuffer sb = new StringBuffer();
    sb.append(modificationDateField);
    sb.append(":[");
    sb.append(utcIsoDate);
    sb.append(" TO *]");
    if (topics != null && topics.size() > 0) {
        sb.append(" AND (");
        List<String> parts = new ArrayList<String>();
        for (String topic : topics) {
            // use a nested query to avoid string escaping issues with special solr chars
            parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        sb.append(StringUtils.join(parts, " OR "));
        sb.append(")");
    }
    SolrQuery query = new SolrQuery(sb.toString());
    query.setRows(1);
    query.setFields(exampleIdField);
    try {
        SolrServer solrServer = getActiveSolrServer();
        return solrServer.query(query).getResults().size() > 0;
    } catch (SolrServerException e) {
        String msg = String.format("Error while fetching topics for examples modified after '%s' on Solr Core '%s'.", utcIsoDate, solrCoreId);
        throw new TrainingSetException(msg, e);
    }
}
Also used : SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)

Example 90 with SolrServerException

use of org.apache.solr.client.solrj.SolrServerException in project stanbol by apache.

the class SolrTrainingSet method getExamples.

protected Batch<Example> getExamples(List<String> topics, Object offset, boolean positive) throws TrainingSetException {
    List<Example> items = new ArrayList<Example>();
    SolrServer solrServer = getActiveSolrServer();
    SolrQuery query = new SolrQuery();
    List<String> parts = new ArrayList<String>();
    String q = "";
    if (topics.isEmpty()) {
        q += "*:*";
    } else if (positive) {
        for (String topic : topics) {
            parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        if (offset != null) {
            q += "(";
        }
        q += StringUtils.join(parts, " OR ");
        if (offset != null) {
            q += ")";
        }
    } else {
        for (String topic : topics) {
            parts.add("-" + topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        q += StringUtils.join(parts, " AND ");
    }
    if (offset != null) {
        q += " AND " + exampleIdField + ":[" + offset.toString() + " TO *]";
    }
    query.setQuery(q);
    query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
    query.set("rows", batchSize + 1);
    String nextExampleId = null;
    try {
        int count = 0;
        QueryResponse response = solrServer.query(query);
        for (SolrDocument result : response.getResults()) {
            if (count == batchSize) {
                nextExampleId = result.getFirstValue(exampleIdField).toString();
            } else {
                count++;
                String exampleId = result.getFirstValue(exampleIdField).toString();
                Collection<Object> labelValues = result.getFieldValues(topicUrisField);
                Collection<Object> textValues = result.getFieldValues(exampleTextField);
                if (textValues == null) {
                    continue;
                }
                items.add(new Example(exampleId, labelValues, textValues));
            }
        }
    } catch (SolrServerException e) {
        String msg = String.format("Error while fetching positive examples for topics ['%s'] on Solr Core '%s'.", StringUtils.join(topics, "', '"), solrCoreId);
        throw new TrainingSetException(msg, e);
    }
    return new Batch<Example>(items, nextExampleId != null, nextExampleId);
}
Also used : SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SolrDocument(org.apache.solr.common.SolrDocument) Batch(org.apache.stanbol.enhancer.topic.api.Batch) Example(org.apache.stanbol.enhancer.topic.api.training.Example) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)

Aggregations

SolrServerException (org.apache.solr.client.solrj.SolrServerException)281 IOException (java.io.IOException)210 SolrQuery (org.apache.solr.client.solrj.SolrQuery)101 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)97 ArrayList (java.util.ArrayList)58 SolrException (org.apache.solr.common.SolrException)57 SolrDocument (org.apache.solr.common.SolrDocument)55 SolrInputDocument (org.apache.solr.common.SolrInputDocument)50 SolrDocumentList (org.apache.solr.common.SolrDocumentList)44 HashMap (java.util.HashMap)30 Map (java.util.Map)29 List (java.util.List)27 UpdateResponse (org.apache.solr.client.solrj.response.UpdateResponse)26 SolrClient (org.apache.solr.client.solrj.SolrClient)23 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)23 NamedList (org.apache.solr.common.util.NamedList)22 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)19 Date (java.util.Date)18 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)17 SolrParams (org.apache.solr.common.params.SolrParams)13