Search in sources :

Example 96 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class CloudMLTQParser method parse.

public Query parse() {
    String id = localParams.get(QueryParsing.V);
    // Do a Real Time Get for the document
    SolrDocument doc = getDocument(id);
    if (doc == null) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
    }
    String[] qf = localParams.getParams("qf");
    Map<String, Float> boostFields = new HashMap<>();
    MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
    mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
    mlt.setMinDocFreq(localParams.getInt("mindf", 0));
    mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
    mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
    mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
    mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
    mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
    Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
    mlt.setBoost(boost);
    mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
    Map<String, Collection<Object>> filteredDocument = new HashMap<>();
    String[] fieldNames;
    if (qf != null) {
        ArrayList<String> fields = new ArrayList();
        for (String fieldName : qf) {
            if (!StringUtils.isEmpty(fieldName)) {
                String[] strings = splitList.split(fieldName);
                for (String string : strings) {
                    if (!StringUtils.isEmpty(string)) {
                        fields.add(string);
                    }
                }
            }
        }
        // Parse field names and boosts from the fields
        boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
        fieldNames = boostFields.keySet().toArray(new String[0]);
    } else {
        ArrayList<String> fields = new ArrayList();
        for (String field : doc.getFieldNames()) {
            // Only use fields that are stored and have an explicit analyzer.
            // This makes sense as the query uses tf/idf/.. for query construction.
            // We might want to relook and change this in the future though.
            SchemaField f = req.getSchema().getFieldOrNull(field);
            if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
                fields.add(field);
            }
        }
        fieldNames = fields.toArray(new String[0]);
    }
    if (fieldNames.length < 1) {
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
    }
    mlt.setFieldNames(fieldNames);
    for (String field : fieldNames) {
        Collection<Object> fieldValues = doc.getFieldValues(field);
        if (fieldValues != null) {
            Collection<Object> values = new ArrayList();
            for (Object val : fieldValues) {
                if (val instanceof IndexableField) {
                    values.add(((IndexableField) val).stringValue());
                } else {
                    values.add(val);
                }
            }
            filteredDocument.put(field, values);
        }
    }
    try {
        Query rawMLTQuery = mlt.like(filteredDocument);
        BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
        if (boost && boostFields.size() > 0) {
            BooleanQuery.Builder newQ = new BooleanQuery.Builder();
            newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
            for (BooleanClause clause : boostedMLTQuery) {
                Query q = clause.getQuery();
                float originalBoost = 1f;
                if (q instanceof BoostQuery) {
                    BoostQuery bq = (BoostQuery) q;
                    q = bq.getQuery();
                    originalBoost = bq.getBoost();
                }
                Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
                q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
                newQ.add(q, clause.getOccur());
            }
            boostedMLTQuery = newQ.build();
        }
        // exclude current document from results
        BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
        realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
        realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
        return realMLTQuery.build();
    } catch (IOException e) {
        e.printStackTrace();
        throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
    }
}
Also used : BooleanQuery(org.apache.lucene.search.BooleanQuery) Query(org.apache.lucene.search.Query) TermQuery(org.apache.lucene.search.TermQuery) BooleanQuery(org.apache.lucene.search.BooleanQuery) BoostQuery(org.apache.lucene.search.BoostQuery) HashMap(java.util.HashMap) BytesRefBuilder(org.apache.lucene.util.BytesRefBuilder) ArrayList(java.util.ArrayList) MoreLikeThis(org.apache.lucene.queries.mlt.MoreLikeThis) BoostQuery(org.apache.lucene.search.BoostQuery) SolrDocument(org.apache.solr.common.SolrDocument) SolrException(org.apache.solr.common.SolrException) TermQuery(org.apache.lucene.search.TermQuery) IOException(java.io.IOException) SchemaField(org.apache.solr.schema.SchemaField) IndexableField(org.apache.lucene.index.IndexableField) BooleanClause(org.apache.lucene.search.BooleanClause) Collection(java.util.Collection)

Example 97 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class StoredFieldsShardResponseProcessor method process.

/**
   * {@inheritDoc}
   */
@Override
public void process(ResponseBuilder rb, ShardRequest shardRequest) {
    boolean returnScores = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0;
    ShardResponse srsp = shardRequest.responses.get(0);
    SolrDocumentList docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");
    String uniqueIdFieldName = rb.req.getSchema().getUniqueKeyField().getName();
    for (SolrDocument doc : docs) {
        Object id = doc.getFieldValue(uniqueIdFieldName).toString();
        ShardDoc shardDoc = rb.resultIds.get(id);
        FieldDoc fieldDoc = (FieldDoc) shardDoc;
        if (shardDoc != null) {
            if (returnScores && !Float.isNaN(fieldDoc.score)) {
                doc.setField("score", fieldDoc.score);
            }
            rb.retrievedDocuments.put(id, doc);
        }
    }
}
Also used : ShardResponse(org.apache.solr.handler.component.ShardResponse) SolrDocument(org.apache.solr.common.SolrDocument) FieldDoc(org.apache.lucene.search.FieldDoc) SolrDocumentList(org.apache.solr.common.SolrDocumentList) ShardDoc(org.apache.solr.handler.component.ShardDoc)

Example 98 with SolrDocument

use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.

the class CdcrVersionReplicationTest method doRealTimeGet.

void doRealTimeGet(String ids, String versions) throws Exception {
    Map<String, Object> expectedIds = new HashMap<>();
    List<String> strs = StrUtils.splitSmart(ids, ",", true);
    List<String> verS = StrUtils.splitSmart(versions, ",", true);
    for (int i = 0; i < strs.size(); i++) {
        if (!verS.isEmpty()) {
            expectedIds.put(strs.get(i), Long.valueOf(verS.get(i)));
        }
    }
    QueryResponse rsp = solrServer.query(params("qt", "/get", "ids", ids));
    Map<String, Object> obtainedIds = new HashMap<>();
    for (SolrDocument doc : rsp.getResults()) {
        obtainedIds.put((String) doc.get("id"), doc.get(vfield));
    }
    assertEquals(expectedIds, obtainedIds);
}
Also used : SolrDocument(org.apache.solr.common.SolrDocument) HashMap(java.util.HashMap) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse)

Example 99 with SolrDocument

use of org.apache.solr.common.SolrDocument in project stanbol by apache.

the class TopicClassificationEngine method updatePerformanceMetadata.

/**
     * Update the performance statistics in a metadata entry of a topic. It is the responsibility of the
     * caller to commit.
     */
protected void updatePerformanceMetadata(String conceptId, float precision, float recall, int positiveSupport, int negativeSupport, List<String> falsePositiveExamples, List<String> falseNegativeExamples) throws ClassifierException {
    SolrServer solrServer = getActiveSolrServer();
    try {
        SolrQuery query = new SolrQuery("*:*");
        query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
        query.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
        for (SolrDocument result : solrServer.query(query).getResults()) {
            // there should be only one (or none: tolerated)
            // fetch any old values to update (all metadata fields are assumed to be stored)s
            Map<String, Collection<Object>> fieldValues = new HashMap<String, Collection<Object>>();
            for (String fieldName : result.getFieldNames()) {
                fieldValues.put(fieldName, result.getFieldValues(fieldName));
            }
            addToList(fieldValues, precisionField, precision);
            addToList(fieldValues, recallField, recall);
            increment(fieldValues, positiveSupportField, positiveSupport);
            increment(fieldValues, negativeSupportField, negativeSupport);
            addToList(fieldValues, falsePositivesField, falsePositiveExamples);
            addToList(fieldValues, falseNegativesField, falseNegativeExamples);
            SolrInputDocument newEntry = new SolrInputDocument();
            for (Map.Entry<String, Collection<Object>> entry : fieldValues.entrySet()) {
                newEntry.addField(entry.getKey(), entry.getValue());
            }
            newEntry.setField(modelEvaluationDateField, UTCTimeStamper.nowUtcDate());
            solrServer.add(newEntry);
        }
        log.info(String.format("Performance for concept '%s': precision=%f, recall=%f," + " positiveSupport=%d, negativeSupport=%d", conceptId, precision, recall, positiveSupport, negativeSupport));
    } catch (Exception e) {
        String msg = String.format("Error updating performance metadata for topic '%s' on Solr Core '%s'", conceptId, solrCoreId);
        throw new ClassifierException(msg, e);
    }
}
Also used : HashMap(java.util.HashMap) EmbeddedSolrServer(org.apache.solr.client.solrj.embedded.EmbeddedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) EngineException(org.apache.stanbol.enhancer.servicesapi.EngineException) SolrServerException(org.apache.solr.client.solrj.SolrServerException) ConfigurationException(org.osgi.service.cm.ConfigurationException) InvalidSyntaxException(org.osgi.framework.InvalidSyntaxException) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException) InvalidContentException(org.apache.stanbol.enhancer.servicesapi.InvalidContentException) EntityhubException(org.apache.stanbol.entityhub.servicesapi.EntityhubException) ChainException(org.apache.stanbol.enhancer.servicesapi.ChainException) IOException(java.io.IOException) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SolrDocument(org.apache.solr.common.SolrDocument) Collection(java.util.Collection) Map(java.util.Map) HashMap(java.util.HashMap) ClassifierException(org.apache.stanbol.enhancer.topic.api.ClassifierException)

Example 100 with SolrDocument

use of org.apache.solr.common.SolrDocument in project stanbol by apache.

the class SolrTrainingSet method getExamples.

protected Batch<Example> getExamples(List<String> topics, Object offset, boolean positive) throws TrainingSetException {
    List<Example> items = new ArrayList<Example>();
    SolrServer solrServer = getActiveSolrServer();
    SolrQuery query = new SolrQuery();
    List<String> parts = new ArrayList<String>();
    String q = "";
    if (topics.isEmpty()) {
        q += "*:*";
    } else if (positive) {
        for (String topic : topics) {
            parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        if (offset != null) {
            q += "(";
        }
        q += StringUtils.join(parts, " OR ");
        if (offset != null) {
            q += ")";
        }
    } else {
        for (String topic : topics) {
            parts.add("-" + topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
        }
        q += StringUtils.join(parts, " AND ");
    }
    if (offset != null) {
        q += " AND " + exampleIdField + ":[" + offset.toString() + " TO *]";
    }
    query.setQuery(q);
    query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
    query.set("rows", batchSize + 1);
    String nextExampleId = null;
    try {
        int count = 0;
        QueryResponse response = solrServer.query(query);
        for (SolrDocument result : response.getResults()) {
            if (count == batchSize) {
                nextExampleId = result.getFirstValue(exampleIdField).toString();
            } else {
                count++;
                String exampleId = result.getFirstValue(exampleIdField).toString();
                Collection<Object> labelValues = result.getFieldValues(topicUrisField);
                Collection<Object> textValues = result.getFieldValues(exampleTextField);
                if (textValues == null) {
                    continue;
                }
                items.add(new Example(exampleId, labelValues, textValues));
            }
        }
    } catch (SolrServerException e) {
        String msg = String.format("Error while fetching positive examples for topics ['%s'] on Solr Core '%s'.", StringUtils.join(topics, "', '"), solrCoreId);
        throw new TrainingSetException(msg, e);
    }
    return new Batch<Example>(items, nextExampleId != null, nextExampleId);
}
Also used : SolrServerException(org.apache.solr.client.solrj.SolrServerException) ArrayList(java.util.ArrayList) ManagedSolrServer(org.apache.stanbol.commons.solr.managed.ManagedSolrServer) SolrServer(org.apache.solr.client.solrj.SolrServer) SolrQuery(org.apache.solr.client.solrj.SolrQuery) SolrDocument(org.apache.solr.common.SolrDocument) Batch(org.apache.stanbol.enhancer.topic.api.Batch) Example(org.apache.stanbol.enhancer.topic.api.training.Example) QueryResponse(org.apache.solr.client.solrj.response.QueryResponse) TrainingSetException(org.apache.stanbol.enhancer.topic.api.training.TrainingSetException)

Aggregations

SolrDocument (org.apache.solr.common.SolrDocument)227 SolrDocumentList (org.apache.solr.common.SolrDocumentList)92 QueryResponse (org.apache.solr.client.solrj.response.QueryResponse)78 ArrayList (java.util.ArrayList)49 Test (org.junit.Test)46 SolrQuery (org.apache.solr.client.solrj.SolrQuery)45 SolrParams (org.apache.solr.common.params.SolrParams)37 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)35 SolrServerException (org.apache.solr.client.solrj.SolrServerException)33 IOException (java.io.IOException)31 SolrInputDocument (org.apache.solr.common.SolrInputDocument)28 HashMap (java.util.HashMap)25 HttpSolrClient (org.apache.solr.client.solrj.impl.HttpSolrClient)24 NamedList (org.apache.solr.common.util.NamedList)21 Map (java.util.Map)19 List (java.util.List)18 SolrClient (org.apache.solr.client.solrj.SolrClient)13 ByteArrayInputStream (java.io.ByteArrayInputStream)12 UpdateRequest (org.apache.solr.client.solrj.request.UpdateRequest)12 SolrException (org.apache.solr.common.SolrException)12