use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.
the class CloudMLTQParser method parse.
public Query parse() {
String id = localParams.get(QueryParsing.V);
// Do a Real Time Get for the document
SolrDocument doc = getDocument(id);
if (doc == null) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Error completing MLT request. Could not fetch " + "document with id [" + id + "]");
}
String[] qf = localParams.getParams("qf");
Map<String, Float> boostFields = new HashMap<>();
MoreLikeThis mlt = new MoreLikeThis(req.getSearcher().getIndexReader());
mlt.setMinTermFreq(localParams.getInt("mintf", MoreLikeThis.DEFAULT_MIN_TERM_FREQ));
mlt.setMinDocFreq(localParams.getInt("mindf", 0));
mlt.setMinWordLen(localParams.getInt("minwl", MoreLikeThis.DEFAULT_MIN_WORD_LENGTH));
mlt.setMaxWordLen(localParams.getInt("maxwl", MoreLikeThis.DEFAULT_MAX_WORD_LENGTH));
mlt.setMaxQueryTerms(localParams.getInt("maxqt", MoreLikeThis.DEFAULT_MAX_QUERY_TERMS));
mlt.setMaxNumTokensParsed(localParams.getInt("maxntp", MoreLikeThis.DEFAULT_MAX_NUM_TOKENS_PARSED));
mlt.setMaxDocFreq(localParams.getInt("maxdf", MoreLikeThis.DEFAULT_MAX_DOC_FREQ));
Boolean boost = localParams.getBool("boost", MoreLikeThis.DEFAULT_BOOST);
mlt.setBoost(boost);
mlt.setAnalyzer(req.getSchema().getIndexAnalyzer());
Map<String, Collection<Object>> filteredDocument = new HashMap<>();
String[] fieldNames;
if (qf != null) {
ArrayList<String> fields = new ArrayList();
for (String fieldName : qf) {
if (!StringUtils.isEmpty(fieldName)) {
String[] strings = splitList.split(fieldName);
for (String string : strings) {
if (!StringUtils.isEmpty(string)) {
fields.add(string);
}
}
}
}
// Parse field names and boosts from the fields
boostFields = SolrPluginUtils.parseFieldBoosts(fields.toArray(new String[0]));
fieldNames = boostFields.keySet().toArray(new String[0]);
} else {
ArrayList<String> fields = new ArrayList();
for (String field : doc.getFieldNames()) {
// Only use fields that are stored and have an explicit analyzer.
// This makes sense as the query uses tf/idf/.. for query construction.
// We might want to relook and change this in the future though.
SchemaField f = req.getSchema().getFieldOrNull(field);
if (f != null && f.stored() && f.getType().isExplicitAnalyzer()) {
fields.add(field);
}
}
fieldNames = fields.toArray(new String[0]);
}
if (fieldNames.length < 1) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires at least one similarity field: qf");
}
mlt.setFieldNames(fieldNames);
for (String field : fieldNames) {
Collection<Object> fieldValues = doc.getFieldValues(field);
if (fieldValues != null) {
Collection<Object> values = new ArrayList();
for (Object val : fieldValues) {
if (val instanceof IndexableField) {
values.add(((IndexableField) val).stringValue());
} else {
values.add(val);
}
}
filteredDocument.put(field, values);
}
}
try {
Query rawMLTQuery = mlt.like(filteredDocument);
BooleanQuery boostedMLTQuery = (BooleanQuery) rawMLTQuery;
if (boost && boostFields.size() > 0) {
BooleanQuery.Builder newQ = new BooleanQuery.Builder();
newQ.setMinimumNumberShouldMatch(boostedMLTQuery.getMinimumNumberShouldMatch());
for (BooleanClause clause : boostedMLTQuery) {
Query q = clause.getQuery();
float originalBoost = 1f;
if (q instanceof BoostQuery) {
BoostQuery bq = (BoostQuery) q;
q = bq.getQuery();
originalBoost = bq.getBoost();
}
Float fieldBoost = boostFields.get(((TermQuery) q).getTerm().field());
q = ((fieldBoost != null) ? new BoostQuery(q, fieldBoost * originalBoost) : clause.getQuery());
newQ.add(q, clause.getOccur());
}
boostedMLTQuery = newQ.build();
}
// exclude current document from results
BooleanQuery.Builder realMLTQuery = new BooleanQuery.Builder();
realMLTQuery.add(boostedMLTQuery, BooleanClause.Occur.MUST);
realMLTQuery.add(createIdQuery(req.getSchema().getUniqueKeyField().getName(), id), BooleanClause.Occur.MUST_NOT);
return realMLTQuery.build();
} catch (IOException e) {
e.printStackTrace();
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "Bad Request");
}
}
use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.
the class StoredFieldsShardResponseProcessor method process.
/**
* {@inheritDoc}
*/
@Override
public void process(ResponseBuilder rb, ShardRequest shardRequest) {
boolean returnScores = (rb.getFieldFlags() & SolrIndexSearcher.GET_SCORES) != 0;
ShardResponse srsp = shardRequest.responses.get(0);
SolrDocumentList docs = (SolrDocumentList) srsp.getSolrResponse().getResponse().get("response");
String uniqueIdFieldName = rb.req.getSchema().getUniqueKeyField().getName();
for (SolrDocument doc : docs) {
Object id = doc.getFieldValue(uniqueIdFieldName).toString();
ShardDoc shardDoc = rb.resultIds.get(id);
FieldDoc fieldDoc = (FieldDoc) shardDoc;
if (shardDoc != null) {
if (returnScores && !Float.isNaN(fieldDoc.score)) {
doc.setField("score", fieldDoc.score);
}
rb.retrievedDocuments.put(id, doc);
}
}
}
use of org.apache.solr.common.SolrDocument in project lucene-solr by apache.
the class CdcrVersionReplicationTest method doRealTimeGet.
void doRealTimeGet(String ids, String versions) throws Exception {
Map<String, Object> expectedIds = new HashMap<>();
List<String> strs = StrUtils.splitSmart(ids, ",", true);
List<String> verS = StrUtils.splitSmart(versions, ",", true);
for (int i = 0; i < strs.size(); i++) {
if (!verS.isEmpty()) {
expectedIds.put(strs.get(i), Long.valueOf(verS.get(i)));
}
}
QueryResponse rsp = solrServer.query(params("qt", "/get", "ids", ids));
Map<String, Object> obtainedIds = new HashMap<>();
for (SolrDocument doc : rsp.getResults()) {
obtainedIds.put((String) doc.get("id"), doc.get(vfield));
}
assertEquals(expectedIds, obtainedIds);
}
use of org.apache.solr.common.SolrDocument in project stanbol by apache.
the class TopicClassificationEngine method updatePerformanceMetadata.
/**
* Update the performance statistics in a metadata entry of a topic. It is the responsibility of the
* caller to commit.
*/
protected void updatePerformanceMetadata(String conceptId, float precision, float recall, int positiveSupport, int negativeSupport, List<String> falsePositiveExamples, List<String> falseNegativeExamples) throws ClassifierException {
SolrServer solrServer = getActiveSolrServer();
try {
SolrQuery query = new SolrQuery("*:*");
query.addFilterQuery(entryTypeField + ":" + METADATA_ENTRY);
query.addFilterQuery(conceptUriField + ":" + ClientUtils.escapeQueryChars(conceptId));
for (SolrDocument result : solrServer.query(query).getResults()) {
// there should be only one (or none: tolerated)
// fetch any old values to update (all metadata fields are assumed to be stored)s
Map<String, Collection<Object>> fieldValues = new HashMap<String, Collection<Object>>();
for (String fieldName : result.getFieldNames()) {
fieldValues.put(fieldName, result.getFieldValues(fieldName));
}
addToList(fieldValues, precisionField, precision);
addToList(fieldValues, recallField, recall);
increment(fieldValues, positiveSupportField, positiveSupport);
increment(fieldValues, negativeSupportField, negativeSupport);
addToList(fieldValues, falsePositivesField, falsePositiveExamples);
addToList(fieldValues, falseNegativesField, falseNegativeExamples);
SolrInputDocument newEntry = new SolrInputDocument();
for (Map.Entry<String, Collection<Object>> entry : fieldValues.entrySet()) {
newEntry.addField(entry.getKey(), entry.getValue());
}
newEntry.setField(modelEvaluationDateField, UTCTimeStamper.nowUtcDate());
solrServer.add(newEntry);
}
log.info(String.format("Performance for concept '%s': precision=%f, recall=%f," + " positiveSupport=%d, negativeSupport=%d", conceptId, precision, recall, positiveSupport, negativeSupport));
} catch (Exception e) {
String msg = String.format("Error updating performance metadata for topic '%s' on Solr Core '%s'", conceptId, solrCoreId);
throw new ClassifierException(msg, e);
}
}
use of org.apache.solr.common.SolrDocument in project stanbol by apache.
the class SolrTrainingSet method getExamples.
protected Batch<Example> getExamples(List<String> topics, Object offset, boolean positive) throws TrainingSetException {
List<Example> items = new ArrayList<Example>();
SolrServer solrServer = getActiveSolrServer();
SolrQuery query = new SolrQuery();
List<String> parts = new ArrayList<String>();
String q = "";
if (topics.isEmpty()) {
q += "*:*";
} else if (positive) {
for (String topic : topics) {
parts.add(topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
if (offset != null) {
q += "(";
}
q += StringUtils.join(parts, " OR ");
if (offset != null) {
q += ")";
}
} else {
for (String topic : topics) {
parts.add("-" + topicUrisField + ":" + ClientUtils.escapeQueryChars(topic));
}
q += StringUtils.join(parts, " AND ");
}
if (offset != null) {
q += " AND " + exampleIdField + ":[" + offset.toString() + " TO *]";
}
query.setQuery(q);
query.addSortField(exampleIdField, SolrQuery.ORDER.asc);
query.set("rows", batchSize + 1);
String nextExampleId = null;
try {
int count = 0;
QueryResponse response = solrServer.query(query);
for (SolrDocument result : response.getResults()) {
if (count == batchSize) {
nextExampleId = result.getFirstValue(exampleIdField).toString();
} else {
count++;
String exampleId = result.getFirstValue(exampleIdField).toString();
Collection<Object> labelValues = result.getFieldValues(topicUrisField);
Collection<Object> textValues = result.getFieldValues(exampleTextField);
if (textValues == null) {
continue;
}
items.add(new Example(exampleId, labelValues, textValues));
}
}
} catch (SolrServerException e) {
String msg = String.format("Error while fetching positive examples for topics ['%s'] on Solr Core '%s'.", StringUtils.join(topics, "', '"), solrCoreId);
throw new TrainingSetException(msg, e);
}
return new Batch<Example>(items, nextExampleId != null, nextExampleId);
}
Aggregations