Search in sources :

Example 11 with SolrServer

use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.

the class TopicClassificationEngine method updatePerformanceMetadata.

/**
     * Update the performance statistics in a metadata entry of a topic. It is the responsibility of the
     * caller to commit.
     */
protected void updatePerformanceMetadata(String conceptId, float precision, float recall, int positiveSupport, int negativeSupport, List<String> falsePositiveExamples, List<String> falseNegativeExamples) throws ClassifierException {
    SolrServer solrServer = getActiveSolrServer();
    try {
        SolrQuery query = new SolrQuery("*:*");
        query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
        query.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
        for (SolrDocument result : solrServer.query(query).getResults()) {
            // there should be only one (or none: tolerated)
            // fetch any old values to update (all metadata fields are assumed to be stored)s
            Map<String, Collection<Object>> fieldValues = new HashMap<String, Collection<Object>>();
            for (String fieldName : result.getFieldNames()) {
                fieldValues.put(fieldName, result.getFieldValues(fieldName));
            }
            addToList(fieldValues, precisionField, precision);
            addToList(fieldValues, recallField, recall);
            increment(fieldValues, positiveSupportField, positiveSupport);
            increment(fieldValues, negativeSupportField, negativeSupport);
            addToList(fieldValues, falsePositivesField, falsePositiveExamples);
            addToList(fieldValues, falseNegativesField, falseNegativeExamples);
            SolrInputDocument newEntry = new SolrInputDocument();
            for (Map.Entry<String, Collection<Object>> entry : fieldValues.entrySet()) {
                newEntry.addField(entry.getKey(), entry.getValue());
            }
            newEntry.setField(modelEvaluationDateField, UTCTimeStamper.nowUtcDate());
            solrServer.add(newEntry);
        }
        log.info(String.format("Performance for concept '%s': precision=%f, recall=%f," + " positiveSupport=%d, negativeSupport=%d", conceptId, precision, recall, positiveSupport, negativeSupport));
    } catch (Exception e) {
        String msg = String.format("Error updating performance metadata for topic '%s' on Solr Core '%s'", conceptId, solrCoreId);
        throw new ClassifierException(msg, e);
    }
}
Also used : HashMap(java.util.HashMap) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ConfigurationException(org.osgi.service.cm.ConfigurationException) InvalidSyntaxException(org.osgi.framework.InvalidSyntaxException) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) ChainException(org.apache.stanbol.enhancer.servicesapi.ChainException) IOException(java.io.IOException) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrDocument(org.apache.solr.common.SolrDocument) Collection(java.util.Collection) Map(java.util.Map) HashMap(java.util.HashMap) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 12 with SolrServer

use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.

the class TopicClassificationEngine method updatePerformanceEstimates.

public synchronized int updatePerformanceEstimates(boolean incremental) throws ClassifierException, TrainingSetException {
    checkTrainingSet();
    if (evaluationRunning) {
        throw new ClassifierException("Another evaluation is already running");
    }
    int updatedTopics = 0;
    //       is now created within the #embeddedSolrServerDir
    try {
        evaluationRunning = true;
        // 3-folds CV is hardcoded for now
        int cvFoldCount = 3;
        // make it possible to limit the number of folds to use
        int cvIterationCount = 3;
        // We will use the training set quite intensively, ensure that the index is packed and its
        // statistics are up to date
        getTrainingSet().optimize();
        for (int cvFoldIndex = 0; cvFoldIndex < cvIterationCount; cvFoldIndex++) {
            updatedTopics = performCVFold(cvFoldIndex, cvFoldCount, cvIterationCount, incremental);
        }
        SolrServer solrServer = getActiveSolrServer();
        solrServer.optimize();
    } catch (ConfigurationException e) {
        throw new ClassifierException(e);
    } catch (IOException e) {
        throw new ClassifierException(e);
    } catch (SolrServerException e) {
        throw new ClassifierException(e);
    } finally {
        FileUtils.deleteQuietly(__evaluationServerDir);
        evaluationRunning = false;
    }
    return updatedTopics;
}
Also used : ConfigurationException(org.osgi.service.cm.ConfigurationException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) IOException(java.io.IOException) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 13 with SolrServer

use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.

the class SolrTrainingSet method getExamples.

protected Batch<Example> getExamples(List<String> topics, Object offset, boolean positive) throws TrainingSetException {
    List<Example> items = new ArrayList<Example>();
    SolrServer solrServer = getActiveSolrServer();
    SolrQuery query = new SolrQuery();
    List<String> parts = new ArrayList<String>();
    String q = "";
    if (topics.isEmpty()) {
        q += "*:*";
    } else if (positive) {
        for (String topic : topics) {
            parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        if (offset != null) {
            q += "(";
        }
        q += StringUtils.join(parts, " OR ");
        if (offset != null) {
            q += ")";
        }
    } else {
        for (String topic : topics) {
            parts.add("-" + topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        q += StringUtils.join(parts, " AND ");
    }
    if (offset != null) {
        q += " AND " + exampleIdField + ":[" + offset.toString() + " TO *]";
    }
    query.setQuery(q);
    query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
    query.set("rows", batchSize + 1);
    String nextExampleId = null;
    try {
        int count = 0;
        QueryResponse response = solrServer.query(query);
        for (SolrDocument result : response.getResults()) {
            if (count == batchSize) {
                nextExampleId = result.getFirstValue(exampleIdField).toString();
            } else {
                count++;
                String exampleId = result.getFirstValue(exampleIdField).toString();
                Collection<Object> labelValues = result.getFieldValues(topicUrisField);
                Collection<Object> textValues = result.getFieldValues(exampleTextField);
                if (textValues == null) {
                    continue;
                }
                items.add(new Example(exampleId, labelValues, textValues));
            }
        }
    } catch (SolrServerException e) {
        String msg = String.format("Error while fetching positive examples for topics ['%s'] on Solr Core '%s'.", StringUtils.join(topics, "', '"), solrCoreId);
        throw new TrainingSetException(msg, e);
    }
    return new Batch<Example>(items, nextExampleId != null, nextExampleId);
}
Also used : SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SolrDocument(org.apache.solr.common.SolrDocument) Batch(org.apache.stanbol.enhancer.topic.api.Batch) Example(org.apache.stanbol.enhancer.topic.api.training.Example) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)

Example 14 with SolrServer

use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.

the class SolrTrainingSet method registerExample.

@Override
public String registerExample(String exampleId, String text, List<String> topics) throws TrainingSetException {
    if (text == null) {
        // special case: example removal
        if (exampleId == null) {
            throw new IllegalArgumentException("exampleId and text should not be null simultaneously");
        }
        SolrServer solrServer = getActiveSolrServer();
        try {
            solrServer.deleteByQuery(exampleIdField + ":" + exampleId);
            solrServer.commit();
            return exampleId;
        } catch (Exception e) {
            String msg = String.format("Error deleting example with id '%s' on Solr Core '%s'", exampleId, solrCoreId);
            throw new TrainingSetException(msg, e);
        }
    }
    if (exampleId == null || exampleId.isEmpty()) {
        exampleId = UUID.randomUUID().toString();
    }
    SolrInputDocument doc = new SolrInputDocument();
    doc.addField(exampleIdField, exampleId);
    doc.addField(exampleTextField, text);
    if (topics != null) {
        doc.addField(topicUrisField, topics);
    }
    doc.addField(modificationDateField, UTCTimeStamper.nowUtcDate());
    SolrServer server = getActiveSolrServer();
    try {
        server.add(doc);
        server.commit();
    } catch (Exception e) {
        String msg = String.format("Could not register example '%s' with topics: ['%s']", exampleId, StringUtils.join(topics, "', '"));
        throw new TrainingSetException(msg, e);
    }
    return exampleId;
}
Also used : SolrInputDocument(org.apache.solr.common.SolrInputDocument) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ConfigurationException(org.osgi.service.cm.ConfigurationException) InvalidSyntaxException(org.osgi.framework.InvalidSyntaxException) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)

Example 15 with SolrServer

use of org.apache.solr.client.solrj.SolrServer in project stanbol by apache.

the class SolrTrainingSet method hasChangedSince.

@Override
public boolean hasChangedSince(List<String> topics, Date referenceDate) throws TrainingSetException {
    String utcIsoDate = UTCTimeStamper.utcIsoString(referenceDate);
    StringBuffer sb = new StringBuffer();
    sb.append(modificationDateField);
    sb.append(":[");
    sb.append(utcIsoDate);
    sb.append(" TO *]");
    if (topics != null && topics.size() > 0) {
        sb.append(" AND (");
        List<String> parts = new ArrayList<String>();
        for (String topic : topics) {
            // use a nested query to avoid string escaping issues with special solr chars
            parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        sb.append(StringUtils.join(parts, " OR "));
        sb.append(")");
    }
    SolrQuery query = new SolrQuery(sb.toString());
    query.setRows(1);
    query.setFields(exampleIdField);
    try {
        SolrServer solrServer = getActiveSolrServer();
        return solrServer.query(query).getResults().size() > 0;
    } catch (SolrServerException e) {
        String msg = String.format("Error while fetching topics for examples modified after '%s' on Solr Core '%s'.", utcIsoDate, solrCoreId);
        throw new TrainingSetException(msg, e);
    }
}
Also used : SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)

Aggregations

SolrServer (org.apache.solr.client.solrj.SolrServer)40 ManagedSolrServer (org.apache.stanbol.commons.solr.managed.ManagedSolrServer)21 SolrServerException (org.apache.solr.client.solrj.SolrServerException)18 EmbeddedSolrServer (org.apache.solr.client.solrj.embedded.EmbeddedSolrServer)17 ClassifierException (org.apache.stanbol.enhancer.topic.api.ClassifierException)13 IOException (java.io.IOException)12 SolrQuery (org.apache.solr.client.solrj.SolrQuery)12 ConfigurationException (org.osgi.service.cm.ConfigurationException)11 SolrDocument (org.apache.solr.common.SolrDocument)10 TrainingSetException (org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)10 InvalidSyntaxException (org.osgi.framework.InvalidSyntaxException)10 OakSolrConfiguration (org.apache.jackrabbit.oak.plugins.index.solr.configuration.OakSolrConfiguration)8 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)8 ArrayList (java.util.ArrayList)7 ChainException (org.apache.stanbol.enhancer.servicesapi.ChainException)7 EngineException (org.apache.stanbol.enhancer.servicesapi.EngineException)7 InvalidContentException (org.apache.stanbol.enhancer.servicesapi.InvalidContentException)7 EntityhubException (org.apache.stanbol.entityhub.servicesapi.EntityhubException)7 SolrInputDocument (org.apache.solr.common.SolrInputDocument)6 Test (org.junit.Test)6