use of org.apache.solr.search.DocList in project lucene-solr by apache.
the class CarrotClusteringEngine method getDocuments.
/**
* Prepares Carrot2 documents for clustering.
*/
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
SolrHighlighter highlighter = null;
SolrParams solrParams = sreq.getParams();
SolrCore core = sreq.getCore();
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
// Maps Solr field names to Carrot2 custom field names
Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map
Map<String, String> languageCodeMap = new HashMap<>();
if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
languageCodeMap.put(split[0], split[1]);
} else {
log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
}
}
}
// Get the documents
boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null) {
Map<String, Object> args = new HashMap<>();
snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
//we don't care about actually highlighting the area
args.put(HighlightParams.SIMPLE_PRE, "");
args.put(HighlightParams.SIMPLE_POST, "");
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
return sreq.getSearcher();
}
};
} else {
log.warn("No highlighter configured, cannot produce summary");
produceSummary = false;
}
}
Iterator<SolrDocument> docsIter = solrDocList.iterator();
List<Document> result = new ArrayList<>(solrDocList.size());
float[] scores = { 1.0f };
int[] docsHolder = new int[1];
Query theQuery = query;
while (docsIter.hasNext()) {
SolrDocument sdoc = docsIter.next();
String snippet = null;
// See comment in ClusteringComponent#finishStage().
if (produceSummary && docIds != null) {
docsHolder[0] = docIds.get(sdoc).intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
if (highlights != null && highlights.size() == 1) {
// should only be one value given our setup
// should only be one document
@SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
final StringBuilder sb = new StringBuilder();
for (int j = 0; j < snippetFieldAry.length; j++) {
// Join fragments with a period, so that Carrot2 does not create
// cross-fragment phrases, such phrases rarely make sense.
String[] highlt = tmp.get(snippetFieldAry[j]);
if (highlt != null && highlt.length > 0) {
for (int i = 0; i < highlt.length; i++) {
sb.append(highlt[i]);
sb.append(" . ");
}
}
}
snippet = sb.toString();
}
}
// If summaries not enabled or summary generation failed, use full content.
if (snippet == null) {
snippet = getConcatenated(sdoc, snippetFieldSpec);
}
// Create a Carrot2 document
Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
// Store Solr id of the document, we need it to map document instances
// found in clusters back to identifiers.
carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
// Set language
if (StringUtils.isNotBlank(languageField)) {
Collection<Object> languages = sdoc.getFieldValues(languageField);
if (languages != null) {
// Use the first Carrot2-supported language
for (Object l : languages) {
String lang = ObjectUtils.toString(l, "");
if (languageCodeMap.containsKey(lang)) {
lang = languageCodeMap.get(lang);
}
// language variants, such as 'zh-cn', but Carrot2 uses underscores.
if (lang.indexOf('-') > 0) {
lang = lang.replace('-', '_');
}
// If the language is supported by Carrot2, we'll get a non-null value
final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
if (carrot2Language != null) {
carrotDocument.setLanguage(carrot2Language);
break;
}
}
}
}
// Add custom fields
if (customFields != null) {
for (Entry<String, String> entry : customFields.entrySet()) {
carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
}
}
result.add(carrotDocument);
}
return result;
}
use of org.apache.solr.search.DocList in project lucene-solr by apache.
the class TextResponseWriter method writeVal.
public final void writeVal(String name, Object val) throws IOException {
// go in order of most common to least common
if (val == null) {
writeNull(name);
} else if (val instanceof String) {
writeStr(name, val.toString(), true);
// micro-optimization... using toString() avoids a cast first
} else if (val instanceof IndexableField) {
IndexableField f = (IndexableField) val;
SchemaField sf = schema.getFieldOrNull(f.name());
if (sf != null) {
sf.getType().write(this, name, f);
} else {
writeStr(name, f.stringValue(), true);
}
} else if (val instanceof Number) {
writeNumber(name, (Number) val);
} else if (val instanceof Boolean) {
writeBool(name, (Boolean) val);
} else if (val instanceof Date) {
writeDate(name, (Date) val);
} else if (val instanceof Document) {
SolrDocument doc = DocsStreamer.convertLuceneDocToSolrDoc((Document) val, schema);
writeSolrDocument(name, doc, returnFields, 0);
} else if (val instanceof SolrDocument) {
writeSolrDocument(name, (SolrDocument) val, returnFields, 0);
} else if (val instanceof ResultContext) {
// requires access to IndexReader
writeDocuments(name, (ResultContext) val);
} else if (val instanceof DocList) {
// Should not happen normally
ResultContext ctx = new BasicResultContext((DocList) val, returnFields, null, null, req);
writeDocuments(name, ctx);
// }
// else if (val instanceof DocSet) {
// how do we know what fields to read?
// todo: have a DocList/DocSet wrapper that
// restricts the fields to write...?
} else if (val instanceof SolrDocumentList) {
writeSolrDocumentList(name, (SolrDocumentList) val, returnFields);
} else if (val instanceof Map) {
writeMap(name, (Map) val, false, true);
} else if (val instanceof NamedList) {
writeNamedList(name, (NamedList) val);
} else if (val instanceof Path) {
writeStr(name, ((Path) val).toAbsolutePath().toString(), true);
} else if (val instanceof IteratorWriter) {
writeIterator((IteratorWriter) val);
} else if (val instanceof Iterable) {
writeArray(name, ((Iterable) val).iterator());
} else if (val instanceof Object[]) {
writeArray(name, (Object[]) val);
} else if (val instanceof Iterator) {
writeArray(name, (Iterator) val);
} else if (val instanceof byte[]) {
byte[] arr = (byte[]) val;
writeByteArr(name, arr, 0, arr.length);
} else if (val instanceof BytesRef) {
BytesRef arr = (BytesRef) val;
writeByteArr(name, arr.bytes, arr.offset, arr.length);
} else if (val instanceof EnumFieldValue) {
writeStr(name, val.toString(), true);
} else if (val instanceof WriteableValue) {
((WriteableValue) val).write(name, this);
} else if (val instanceof MapWriter) {
writeMap((MapWriter) val);
} else if (val instanceof MapSerializable) {
//todo find a better way to reuse the map more efficiently
writeMap(name, ((MapSerializable) val).toMap(new LinkedHashMap<>()), false, true);
} else {
// default... for debugging only
writeStr(name, val.getClass().getName() + ':' + val.toString(), true);
}
}
use of org.apache.solr.search.DocList in project lucene-solr by apache.
the class TextResponseWriter method writeDocuments.
public final void writeDocuments(String name, ResultContext res) throws IOException {
DocList ids = res.getDocList();
Iterator<SolrDocument> docsStreamer = res.getProcessedDocuments();
writeStartDocumentList(name, ids.offset(), ids.size(), ids.matches(), res.wantsScores() ? new Float(ids.maxScore()) : null);
int idx = 0;
while (docsStreamer.hasNext()) {
writeSolrDocument(null, docsStreamer.next(), res.getReturnFields(), idx);
idx++;
}
writeEndDocumentList();
}
use of org.apache.solr.search.DocList in project lucene-solr by apache.
the class ChildDocTransformer method transform.
@Override
public void transform(SolrDocument doc, int docid, float score) {
FieldType idFt = idField.getType();
Object parentIdField = doc.getFirstValue(idField.getName());
String parentIdExt = parentIdField instanceof IndexableField ? idFt.toExternal((IndexableField) parentIdField) : parentIdField.toString();
try {
Query parentQuery = idFt.getFieldQuery(null, idField, parentIdExt);
Query query = new ToChildBlockJoinQuery(parentQuery, parentsFilter);
DocList children = context.getSearcher().getDocList(query, childFilterQuery, new Sort(), 0, limit);
if (children.matches() > 0) {
DocIterator i = children.iterator();
while (i.hasNext()) {
Integer childDocNum = i.next();
Document childDoc = context.getSearcher().doc(childDocNum);
SolrDocument solrChildDoc = DocsStreamer.convertLuceneDocToSolrDoc(childDoc, schema);
// TODO: future enhancement...
// support an fl local param in the transformer, which is used to build
// a private ReturnFields instance that we use to prune unwanted field
// names from solrChildDoc
doc.addChildDocument(solrChildDoc);
}
}
} catch (IOException e) {
doc.put(name, "Could not fetch child Documents");
}
}
use of org.apache.solr.search.DocList in project lucene-solr by apache.
the class XLSXWriter method writeResponse.
public void writeResponse(OutputStream out, LinkedHashMap<String, String> colNamesMap, LinkedHashMap<String, Integer> colWidthsMap) throws IOException {
SolrParams params = req.getParams();
Collection<String> fields = returnFields.getRequestedFieldNames();
Object responseObj = rsp.getValues().get("response");
boolean returnOnlyStored = false;
if (fields == null || returnFields.hasPatternMatching()) {
if (responseObj instanceof SolrDocumentList) {
// get the list of fields from the SolrDocumentList
if (fields == null) {
fields = new LinkedHashSet<String>();
}
for (SolrDocument sdoc : (SolrDocumentList) responseObj) {
fields.addAll(sdoc.getFieldNames());
}
} else {
// get the list of fields from the index
Iterable<String> all = req.getSearcher().getFieldNames();
if (fields == null) {
fields = Sets.newHashSet(all);
} else {
Iterables.addAll(fields, all);
}
}
if (returnFields.wantsScore()) {
fields.add("score");
} else {
fields.remove("score");
}
returnOnlyStored = true;
}
for (String field : fields) {
if (!returnFields.wantsField(field)) {
continue;
}
if (field.equals("score")) {
XLField xlField = new XLField();
xlField.name = "score";
xlFields.put("score", xlField);
continue;
}
SchemaField sf = schema.getFieldOrNull(field);
if (sf == null) {
FieldType ft = new StrField();
sf = new SchemaField(field, ft);
}
// Return only stored fields, unless an explicit field list is specified
if (returnOnlyStored && sf != null && !sf.stored()) {
continue;
}
XLField xlField = new XLField();
xlField.name = field;
xlField.sf = sf;
xlFields.put(field, xlField);
}
wb.addRow();
//write header
for (XLField xlField : xlFields.values()) {
String printName = xlField.name;
int colWidth = 14;
String niceName = colNamesMap.get(xlField.name);
if (niceName != null) {
printName = niceName;
}
Integer niceWidth = colWidthsMap.get(xlField.name);
if (niceWidth != null) {
colWidth = niceWidth.intValue();
}
writeStr(xlField.name, printName, false);
wb.setColWidth(colWidth);
wb.setHeaderCell();
}
wb.setHeaderRow();
wb.addRow();
if (responseObj instanceof ResultContext) {
writeDocuments(null, (ResultContext) responseObj);
} else if (responseObj instanceof DocList) {
ResultContext ctx = new BasicResultContext((DocList) responseObj, returnFields, null, null, req);
writeDocuments(null, ctx);
} else if (responseObj instanceof SolrDocumentList) {
writeSolrDocumentList(null, (SolrDocumentList) responseObj, returnFields);
}
wb.flush(out);
wb = null;
}
Aggregations