use of org.apache.solr.search.DocListAndSet in project lucene-solr by apache.
the class ClusteringComponent method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
final String name = getClusteringEngineName(rb);
boolean useResults = params.getBool(ClusteringParams.USE_SEARCH_RESULTS, false);
if (useResults == true) {
SearchClusteringEngine engine = searchClusteringEngines.get(name);
if (engine != null) {
checkAvailable(name, engine);
DocListAndSet results = rb.getResults();
Map<SolrDocument, Integer> docIds = new HashMap<>(results.docList.size());
SolrDocumentList solrDocList = docListToSolrDocumentList(results.docList, rb.req.getSearcher(), engine.getFieldsToLoad(rb.req), docIds);
Object clusters = engine.cluster(rb.getQuery(), solrDocList, docIds, rb.req);
rb.rsp.add("clusters", clusters);
} else {
log.warn("No engine named: " + name);
}
}
boolean useCollection = params.getBool(ClusteringParams.USE_COLLECTION, false);
if (useCollection == true) {
DocumentClusteringEngine engine = documentClusteringEngines.get(name);
if (engine != null) {
checkAvailable(name, engine);
boolean useDocSet = params.getBool(ClusteringParams.USE_DOC_SET, false);
NamedList<?> nl = null;
// TODO: This likely needs to be made into a background task that runs in an executor
if (useDocSet == true) {
nl = engine.cluster(rb.getResults().docSet, params);
} else {
nl = engine.cluster(params);
}
rb.rsp.add("clusters", nl);
} else {
log.warn("No engine named: " + name);
}
}
}
use of org.apache.solr.search.DocListAndSet in project lucene-solr by apache.
the class MoreLikeThisComponent method getMoreLikeThese.
NamedList<DocList> getMoreLikeThese(ResponseBuilder rb, SolrIndexSearcher searcher, DocList docs, int flags) throws IOException {
SolrParams p = rb.req.getParams();
IndexSchema schema = searcher.getSchema();
MoreLikeThisHandler.MoreLikeThisHelper mltHelper = new MoreLikeThisHandler.MoreLikeThisHelper(p, searcher);
NamedList<DocList> mlt = new SimpleOrderedMap<>();
DocIterator iterator = docs.iterator();
SimpleOrderedMap<Object> dbg = null;
if (rb.isDebug()) {
dbg = new SimpleOrderedMap<>();
}
while (iterator.hasNext()) {
int id = iterator.nextDoc();
int rows = p.getInt(MoreLikeThisParams.DOC_COUNT, 5);
DocListAndSet sim = mltHelper.getMoreLikeThis(id, 0, rows, null, null, flags);
String name = schema.printableUniqueKey(searcher.doc(id));
mlt.add(name, sim.docList);
if (dbg != null) {
SimpleOrderedMap<Object> docDbg = new SimpleOrderedMap<>();
docDbg.add("rawMLTQuery", mltHelper.getRawMLTQuery().toString());
docDbg.add("boostedMLTQuery", mltHelper.getBoostedMLTQuery().toString());
docDbg.add("realMLTQuery", mltHelper.getRealMLTQuery().toString());
SimpleOrderedMap<Object> explains = new SimpleOrderedMap<>();
DocIterator mltIte = sim.docList.iterator();
while (mltIte.hasNext()) {
int mltid = mltIte.nextDoc();
String key = schema.printableUniqueKey(searcher.doc(mltid));
explains.add(key, searcher.explain(mltHelper.getRealMLTQuery(), mltid));
}
docDbg.add("explain", explains);
dbg.add(name, docDbg);
}
}
// add debug information
if (dbg != null) {
rb.addDebugInfo("moreLikeThis", dbg);
}
return mlt;
}
use of org.apache.solr.search.DocListAndSet in project Solbase by Photobucket.
the class SolbaseQueryComponent method process.
/**
* Actually run the query
*/
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrQueryRequest req = rb.req;
SolrQueryResponse rsp = rb.rsp;
SolrParams params = req.getParams();
if (!params.getBool(COMPONENT_NAME, true)) {
return;
}
SolrIndexSearcher searcher = req.getSearcher();
if (rb.getQueryCommand().getOffset() < 0) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "'start' parameter cannot be negative");
}
// -1 as flag if not set.
long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1);
// Optional: This could also be implemented by the top-level searcher
// sending
// a filter that lists the ids... that would be transparent to
// the request handler, but would be more expensive (and would preserve
// score
// too if desired).
String ids = params.get(ShardParams.IDS);
if (ids != null) {
List<String> idArr = StrUtils.splitSmart(ids, ",", true);
int[] luceneIds = new int[idArr.size()];
int docs = 0;
for (int i = 0; i < idArr.size(); i++) {
luceneIds[docs++] = Integer.parseInt(idArr.get(i));
}
// we are indexing docId as solr uniq_id. by doing this, we are
// bound to INTEGER.MAX_VALUE ~= 2 billion
// docs is number of docs
DocListAndSet res = new DocListAndSet();
res.docList = new DocSlice(0, docs, luceneIds, null, docs, 0, null);
if (rb.isNeedDocSet()) {
List<Query> queries = new ArrayList<Query>();
queries.add(rb.getQuery());
List<Query> filters = rb.getFilters();
if (filters != null)
queries.addAll(filters);
res.docSet = searcher.getDocSet(queries);
}
rb.setResults(res);
rsp.add("response", rb.getResults().docList);
return;
}
SolrIndexSearcher.QueryCommand cmd = rb.getQueryCommand();
cmd.setTimeAllowed(timeAllowed);
SolrIndexSearcher.QueryResult result = new SolrIndexSearcher.QueryResult();
searcher.search(result, cmd);
rb.setResult(result);
rsp.add("response", rb.getResults().docList);
rsp.getToLog().add("hits", rb.getResults().docList.matches());
// The query cache doesn't currently store sort field values, and
// SolrIndexSearcher doesn't
// currently have an option to return sort field values. Because of
// this, we
// take the documents given and re-derive the sort values.
boolean fsv = req.getParams().getBool(ResponseBuilder.FIELD_SORT_VALUES, false);
if (fsv) {
Sort sort = rb.getSortSpec().getSort();
SortField[] sortFields = sort == null ? new SortField[] { SortField.FIELD_SCORE } : sort.getSort();
// order is important for the
NamedList sortVals = new NamedList();
// sort fields
// a
Field field = new Field("dummy", "", Field.Store.YES, Field.Index.NO);
// dummy
// Field
SolrIndexReader reader = searcher.getReader();
SolrIndexReader[] readers = reader.getLeafReaders();
SolrIndexReader subReader = reader;
if (readers.length == 1) {
// if there is a single segment, use that subReader and avoid
// looking up each time
subReader = readers[0];
readers = null;
}
int[] offsets = reader.getLeafOffsets();
//TODO: need to fetch sort value from collector instead of re-derive lookup from id
for (SortField sortField : sortFields) {
int type = sortField.getType();
if (type == SortField.SCORE || type == SortField.DOC)
continue;
FieldComparator comparator = null;
FieldComparator[] comparators = (readers == null) ? null : new FieldComparator[readers.length];
String fieldname = sortField.getField();
FieldType ft = fieldname == null ? null : req.getSchema().getFieldTypeNoEx(fieldname);
DocSlice docList = (DocSlice) rb.getResults().docList;
ArrayList<Object> vals = new ArrayList<Object>(docList.size());
for (int i = docList.offset; i < docList.len; i++) {
vals.add(new Integer(docList.sorts[i][((EmbeddedSortField) sortField).getFieldNumber() - 1]));
}
sortVals.add(fieldname, vals);
}
rsp.add("sort_values", sortVals);
}
// pre-fetch returned documents
if (!req.getParams().getBool(ShardParams.IS_SHARD, false) && rb.getResults().docList != null && rb.getResults().docList.size() <= 50) {
// TODO: this may depend on the highlighter component (or other
// components?)
SolrPluginUtils.optimizePreFetchDocs(rb.getResults().docList, rb.getQuery(), req, rsp);
}
}
use of org.apache.solr.search.DocListAndSet in project lucene-solr by apache.
the class MoreLikeThisHandler method handleRequestBody.
@Override
public void handleRequestBody(SolrQueryRequest req, SolrQueryResponse rsp) throws Exception {
SolrParams params = req.getParams();
long timeAllowed = (long) params.getInt(CommonParams.TIME_ALLOWED, -1);
if (timeAllowed > 0) {
SolrQueryTimeoutImpl.set(timeAllowed);
}
try {
// Set field flags
ReturnFields returnFields = new SolrReturnFields(req);
rsp.setReturnFields(returnFields);
int flags = 0;
if (returnFields.wantsScore()) {
flags |= SolrIndexSearcher.GET_SCORES;
}
String defType = params.get(QueryParsing.DEFTYPE, QParserPlugin.DEFAULT_QTYPE);
String q = params.get(CommonParams.Q);
Query query = null;
SortSpec sortSpec = null;
List<Query> filters = null;
try {
if (q != null) {
QParser parser = QParser.getParser(q, defType, req);
query = parser.getQuery();
sortSpec = parser.getSortSpec(true);
}
String[] fqs = req.getParams().getParams(CommonParams.FQ);
if (fqs != null && fqs.length != 0) {
filters = new ArrayList<>();
for (String fq : fqs) {
if (fq != null && fq.trim().length() != 0) {
QParser fqp = QParser.getParser(fq, req);
filters.add(fqp.getQuery());
}
}
}
} catch (SyntaxError e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
SolrIndexSearcher searcher = req.getSearcher();
MoreLikeThisHelper mlt = new MoreLikeThisHelper(params, searcher);
// Hold on to the interesting terms if relevant
TermStyle termStyle = TermStyle.get(params.get(MoreLikeThisParams.INTERESTING_TERMS));
List<InterestingTerm> interesting = (termStyle == TermStyle.NONE) ? null : new ArrayList<>(mlt.mlt.getMaxQueryTerms());
DocListAndSet mltDocs = null;
// Parse Required Params
// This will either have a single Reader or valid query
Reader reader = null;
try {
if (q == null || q.trim().length() < 1) {
Iterable<ContentStream> streams = req.getContentStreams();
if (streams != null) {
Iterator<ContentStream> iter = streams.iterator();
if (iter.hasNext()) {
reader = iter.next().getReader();
}
if (iter.hasNext()) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis does not support multiple ContentStreams");
}
}
}
int start = params.getInt(CommonParams.START, CommonParams.START_DEFAULT);
int rows = params.getInt(CommonParams.ROWS, CommonParams.ROWS_DEFAULT);
// --------------------------------------------------------------------------------
if (reader != null) {
mltDocs = mlt.getMoreLikeThis(reader, start, rows, filters, interesting, flags);
} else if (q != null) {
// Matching options
boolean includeMatch = params.getBool(MoreLikeThisParams.MATCH_INCLUDE, true);
int matchOffset = params.getInt(MoreLikeThisParams.MATCH_OFFSET, 0);
// Find the base match
DocList match = searcher.getDocList(query, null, null, matchOffset, 1, // only get the first one...
flags);
if (includeMatch) {
rsp.add("match", match);
}
// This is an iterator, but we only handle the first match
DocIterator iterator = match.iterator();
if (iterator.hasNext()) {
// do a MoreLikeThis query for each document in results
int id = iterator.nextDoc();
mltDocs = mlt.getMoreLikeThis(id, start, rows, filters, interesting, flags);
}
} else {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "MoreLikeThis requires either a query (?q=) or text to find similar documents.");
}
} finally {
if (reader != null) {
reader.close();
}
}
if (mltDocs == null) {
// avoid NPE
mltDocs = new DocListAndSet();
}
rsp.addResponse(mltDocs.docList);
if (interesting != null) {
if (termStyle == TermStyle.DETAILS) {
NamedList<Float> it = new NamedList<>();
for (InterestingTerm t : interesting) {
it.add(t.term.toString(), t.boost);
}
rsp.add("interestingTerms", it);
} else {
List<String> it = new ArrayList<>(interesting.size());
for (InterestingTerm t : interesting) {
it.add(t.term.text());
}
rsp.add("interestingTerms", it);
}
}
// maybe facet the results
if (params.getBool(FacetParams.FACET, false)) {
if (mltDocs.docSet == null) {
rsp.add("facet_counts", null);
} else {
SimpleFacets f = new SimpleFacets(req, mltDocs.docSet, params);
rsp.add("facet_counts", FacetComponent.getFacetCounts(f));
}
}
boolean dbg = req.getParams().getBool(CommonParams.DEBUG_QUERY, false);
boolean dbgQuery = false, dbgResults = false;
if (dbg == false) {
//if it's true, we are doing everything anyway.
String[] dbgParams = req.getParams().getParams(CommonParams.DEBUG);
if (dbgParams != null) {
for (String dbgParam : dbgParams) {
if (dbgParam.equals(CommonParams.QUERY)) {
dbgQuery = true;
} else if (dbgParam.equals(CommonParams.RESULTS)) {
dbgResults = true;
}
}
}
} else {
dbgQuery = true;
dbgResults = true;
}
// Copied from StandardRequestHandler... perhaps it should be added to doStandardDebug?
if (dbg == true) {
try {
NamedList<Object> dbgInfo = SolrPluginUtils.doStandardDebug(req, q, mlt.getRawMLTQuery(), mltDocs.docList, dbgQuery, dbgResults);
if (null != dbgInfo) {
if (null != filters) {
dbgInfo.add("filter_queries", req.getParams().getParams(CommonParams.FQ));
List<String> fqs = new ArrayList<>(filters.size());
for (Query fq : filters) {
fqs.add(QueryParsing.toString(fq, req.getSchema()));
}
dbgInfo.add("parsed_filter_queries", fqs);
}
rsp.add("debug", dbgInfo);
}
} catch (Exception e) {
SolrException.log(log, "Exception during debug", e);
rsp.add("exception_during_debug", SolrException.toStr(e));
}
}
} catch (ExitableDirectoryReader.ExitingReaderException ex) {
log.warn("Query: " + req.getParamString() + "; " + ex.getMessage());
} finally {
SolrQueryTimeoutImpl.reset();
}
}
use of org.apache.solr.search.DocListAndSet in project lucene-solr by apache.
the class FieldOptions method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
SolrParams params = rb.req.getParams();
if (!params.getBool(COMPONENT_NAME, false)) {
return;
}
NamedList<Object> termVectors = new NamedList<>();
rb.rsp.add(TERM_VECTORS, termVectors);
IndexSchema schema = rb.req.getSchema();
SchemaField keyField = schema.getUniqueKeyField();
String uniqFieldName = null;
if (keyField != null) {
uniqFieldName = keyField.getName();
}
FieldOptions allFields = new FieldOptions();
//figure out what options we have, and try to get the appropriate vector
allFields.termFreq = params.getBool(TermVectorParams.TF, false);
allFields.positions = params.getBool(TermVectorParams.POSITIONS, false);
allFields.offsets = params.getBool(TermVectorParams.OFFSETS, false);
allFields.payloads = params.getBool(TermVectorParams.PAYLOADS, false);
allFields.docFreq = params.getBool(TermVectorParams.DF, false);
allFields.tfIdf = params.getBool(TermVectorParams.TF_IDF, false);
//short cut to all values.
if (params.getBool(TermVectorParams.ALL, false)) {
allFields.termFreq = true;
allFields.positions = true;
allFields.offsets = true;
allFields.payloads = true;
allFields.docFreq = true;
allFields.tfIdf = true;
}
//Build up our per field mapping
Map<String, FieldOptions> fieldOptions = new HashMap<>();
NamedList<List<String>> warnings = new NamedList<>();
List<String> noTV = new ArrayList<>();
List<String> noPos = new ArrayList<>();
List<String> noOff = new ArrayList<>();
List<String> noPay = new ArrayList<>();
Set<String> fields = getFields(rb);
if (null != fields) {
//we have specific fields to retrieve, or no fields
for (String field : fields) {
// workaround SOLR-3523
if (null == field || "score".equals(field))
continue;
// we don't want to issue warnings about the uniqueKey field
// since it can cause lots of confusion in distributed requests
// where the uniqueKey field is injected into the fl for merging
final boolean fieldIsUniqueKey = field.equals(uniqFieldName);
SchemaField sf = schema.getFieldOrNull(field);
if (sf != null) {
if (sf.storeTermVector()) {
FieldOptions option = fieldOptions.get(field);
if (option == null) {
option = new FieldOptions();
option.fieldName = field;
fieldOptions.put(field, option);
}
//get the per field mappings
option.termFreq = params.getFieldBool(field, TermVectorParams.TF, allFields.termFreq);
option.docFreq = params.getFieldBool(field, TermVectorParams.DF, allFields.docFreq);
option.tfIdf = params.getFieldBool(field, TermVectorParams.TF_IDF, allFields.tfIdf);
//Validate these are even an option
option.positions = params.getFieldBool(field, TermVectorParams.POSITIONS, allFields.positions);
if (option.positions && !sf.storeTermPositions() && !fieldIsUniqueKey) {
noPos.add(field);
}
option.offsets = params.getFieldBool(field, TermVectorParams.OFFSETS, allFields.offsets);
if (option.offsets && !sf.storeTermOffsets() && !fieldIsUniqueKey) {
noOff.add(field);
}
option.payloads = params.getFieldBool(field, TermVectorParams.PAYLOADS, allFields.payloads);
if (option.payloads && !sf.storeTermPayloads() && !fieldIsUniqueKey) {
noPay.add(field);
}
} else {
//field doesn't have term vectors
if (!fieldIsUniqueKey)
noTV.add(field);
}
} else {
//field doesn't exist
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, "undefined field: " + field);
}
}
}
// to be changed to account for that.
if (!noTV.isEmpty()) {
warnings.add("noTermVectors", noTV);
}
if (!noPos.isEmpty()) {
warnings.add("noPositions", noPos);
}
if (!noOff.isEmpty()) {
warnings.add("noOffsets", noOff);
}
if (!noPay.isEmpty()) {
warnings.add("noPayloads", noPay);
}
if (warnings.size() > 0) {
termVectors.add(TV_KEY_WARNINGS, warnings);
}
DocListAndSet listAndSet = rb.getResults();
List<Integer> docIds = getInts(params.getParams(TermVectorParams.DOC_IDS));
Iterator<Integer> iter;
if (docIds != null && !docIds.isEmpty()) {
iter = docIds.iterator();
} else {
DocList list = listAndSet.docList;
iter = list.iterator();
}
SolrIndexSearcher searcher = rb.req.getSearcher();
IndexReader reader = searcher.getIndexReader();
//the TVMapper is a TermVectorMapper which can be used to optimize loading of Term Vectors
//Only load the id field to get the uniqueKey of that
//field
final String finalUniqFieldName = uniqFieldName;
final List<String> uniqValues = new ArrayList<>();
// TODO: is this required to be single-valued? if so, we should STOP
// once we find it...
final StoredFieldVisitor getUniqValue = new StoredFieldVisitor() {
@Override
public void stringField(FieldInfo fieldInfo, byte[] bytes) {
uniqValues.add(new String(bytes, StandardCharsets.UTF_8));
}
@Override
public void intField(FieldInfo fieldInfo, int value) {
uniqValues.add(Integer.toString(value));
}
@Override
public void longField(FieldInfo fieldInfo, long value) {
uniqValues.add(Long.toString(value));
}
@Override
public Status needsField(FieldInfo fieldInfo) {
return (fieldInfo.name.equals(finalUniqFieldName)) ? Status.YES : Status.NO;
}
};
while (iter.hasNext()) {
Integer docId = iter.next();
NamedList<Object> docNL = new NamedList<>();
if (keyField != null) {
reader.document(docId, getUniqValue);
String uniqVal = null;
if (uniqValues.size() != 0) {
uniqVal = uniqValues.get(0);
uniqValues.clear();
docNL.add("uniqueKey", uniqVal);
termVectors.add(uniqVal, docNL);
}
} else {
// support for schemas w/o a unique key,
termVectors.add("doc-" + docId, docNL);
}
if (null != fields) {
for (Map.Entry<String, FieldOptions> entry : fieldOptions.entrySet()) {
final String field = entry.getKey();
final Terms vector = reader.getTermVector(docId, field);
if (vector != null) {
TermsEnum termsEnum = vector.iterator();
mapOneVector(docNL, entry.getValue(), reader, docId, termsEnum, field);
}
}
} else {
// extract all fields
final Fields vectors = reader.getTermVectors(docId);
for (String field : vectors) {
Terms terms = vectors.terms(field);
if (terms != null) {
TermsEnum termsEnum = terms.iterator();
mapOneVector(docNL, allFields, reader, docId, termsEnum, field);
}
}
}
}
}
Aggregations