use of org.apache.solr.highlight.SolrHighlighter in project lucene-solr by apache.
the class CarrotClusteringEngine method getDocuments.
/**
* Prepares Carrot2 documents for clustering.
*/
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
SolrHighlighter highlighter = null;
SolrParams solrParams = sreq.getParams();
SolrCore core = sreq.getCore();
String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
// Maps Solr field names to Carrot2 custom field names
Map<String, String> customFields = getCustomFieldsMap(solrParams);
// Parse language code map string into a map
Map<String, String> languageCodeMap = new HashMap<>();
if (StringUtils.isNotBlank(languageField)) {
for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
final String[] split = pair.split(":");
if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
languageCodeMap.put(split[0], split[1]);
} else {
log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
}
}
}
// Get the documents
boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
SolrQueryRequest req = null;
String[] snippetFieldAry = null;
if (produceSummary) {
highlighter = HighlightComponent.getHighlighter(core);
if (highlighter != null) {
Map<String, Object> args = new HashMap<>();
snippetFieldAry = snippetFieldSpec.split("[, ]");
args.put(HighlightParams.FIELDS, snippetFieldAry);
args.put(HighlightParams.HIGHLIGHT, "true");
//we don't care about actually highlighting the area
args.put(HighlightParams.SIMPLE_PRE, "");
args.put(HighlightParams.SIMPLE_POST, "");
args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {
@Override
public SolrIndexSearcher getSearcher() {
return sreq.getSearcher();
}
};
} else {
log.warn("No highlighter configured, cannot produce summary");
produceSummary = false;
}
}
Iterator<SolrDocument> docsIter = solrDocList.iterator();
List<Document> result = new ArrayList<>(solrDocList.size());
float[] scores = { 1.0f };
int[] docsHolder = new int[1];
Query theQuery = query;
while (docsIter.hasNext()) {
SolrDocument sdoc = docsIter.next();
String snippet = null;
// See comment in ClusteringComponent#finishStage().
if (produceSummary && docIds != null) {
docsHolder[0] = docIds.get(sdoc).intValue();
DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
if (highlights != null && highlights.size() == 1) {
// should only be one value given our setup
// should only be one document
@SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
final StringBuilder sb = new StringBuilder();
for (int j = 0; j < snippetFieldAry.length; j++) {
// Join fragments with a period, so that Carrot2 does not create
// cross-fragment phrases, such phrases rarely make sense.
String[] highlt = tmp.get(snippetFieldAry[j]);
if (highlt != null && highlt.length > 0) {
for (int i = 0; i < highlt.length; i++) {
sb.append(highlt[i]);
sb.append(" . ");
}
}
}
snippet = sb.toString();
}
}
// If summaries not enabled or summary generation failed, use full content.
if (snippet == null) {
snippet = getConcatenated(sdoc, snippetFieldSpec);
}
// Create a Carrot2 document
Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
// Store Solr id of the document, we need it to map document instances
// found in clusters back to identifiers.
carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
// Set language
if (StringUtils.isNotBlank(languageField)) {
Collection<Object> languages = sdoc.getFieldValues(languageField);
if (languages != null) {
// Use the first Carrot2-supported language
for (Object l : languages) {
String lang = ObjectUtils.toString(l, "");
if (languageCodeMap.containsKey(lang)) {
lang = languageCodeMap.get(lang);
}
// language variants, such as 'zh-cn', but Carrot2 uses underscores.
if (lang.indexOf('-') > 0) {
lang = lang.replace('-', '_');
}
// If the language is supported by Carrot2, we'll get a non-null value
final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
if (carrot2Language != null) {
carrotDocument.setLanguage(carrot2Language);
break;
}
}
}
}
// Add custom fields
if (customFields != null) {
for (Entry<String, String> entry : customFields.entrySet()) {
carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
}
}
result.add(carrotDocument);
}
return result;
}
use of org.apache.solr.highlight.SolrHighlighter in project lucene-solr by apache.
the class HighlightComponent method process.
@Override
public void process(ResponseBuilder rb) throws IOException {
if (rb.doHighlights) {
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
SolrHighlighter highlighter = getHighlighter(params);
//TODO: get from builder by default?
String[] defaultHighlightFields = rb.getQparser() != null ? rb.getQparser().getDefaultHighlightFields() : null;
Query highlightQuery = rb.getHighlightQuery();
if (highlightQuery == null) {
if (rb.getQparser() != null) {
try {
highlightQuery = rb.getQparser().getHighlightQuery();
rb.setHighlightQuery(highlightQuery);
} catch (Exception e) {
throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
}
} else {
highlightQuery = rb.getQuery();
rb.setHighlightQuery(highlightQuery);
}
}
// No highlighting if there is no query -- consider q.alt=*:*
if (highlightQuery != null) {
NamedList sumData = highlighter.doHighlighting(rb.getResults().docList, highlightQuery, req, defaultHighlightFields);
if (sumData != null) {
// TODO ???? add this directly to the response?
rb.rsp.add("highlighting", sumData);
}
}
}
}
use of org.apache.solr.highlight.SolrHighlighter in project Solbase by Photobucket.
the class SolbaseComponent method process.
public void process(ResponseBuilder rb) throws IOException {
DocList list = rb.getResults().docList;
DocIterator it = list.iterator();
List<Integer> docIds = new ArrayList<Integer>(list.size());
while (it.hasNext()) docIds.add(it.next());
IndexReader reader = (IndexReader) ((SolrIndexReader) rb.req.getSearcher().getIndexReader()).getWrappedReader();
SolrQueryRequest req = rb.req;
SolrParams params = req.getParams();
String ids = params.get(ShardParams.IDS);
// first phase of sharding only tries to fetch docids and scores which are already in tv
if (SolbaseShardUtil.getNumShard() != 0) {
if (ids != null) {
IndexReader.firstPhase.set(false);
} else {
IndexReader.firstPhase.set(true);
}
} else {
// it's always false in case of stand alone
IndexReader.firstPhase.set(false);
}
logger.debug(reader.getIndexName() + " : Fetching " + docIds.size() + " Docs");
if (docIds.size() > 0) {
List<byte[]> fieldFilter = null;
Set<String> returnFields = rb.rsp.getReturnFields();
if (returnFields != null) {
// copy return fields list
fieldFilter = new ArrayList<byte[]>(returnFields.size());
for (String field : returnFields) {
fieldFilter.add(Bytes.toBytes(field));
}
// add highlight fields
SolrHighlighter highligher = rb.req.getCore().getHighlighter();
if (highligher.isHighlightingEnabled(rb.req.getParams())) {
for (String field : highligher.getHighlightFields(rb.getQuery(), rb.req, null)) if (!returnFields.contains(field))
fieldFilter.add(Bytes.toBytes(field));
}
// fetch unique key if one exists.
SchemaField keyField = rb.req.getSearcher().getSchema().getUniqueKeyField();
if (null != keyField)
if (!returnFields.contains(keyField))
fieldFilter.add(Bytes.toBytes(keyField.getName()));
}
FieldSelector selector = new SolbaseFieldSelector(docIds, fieldFilter);
// This will bulk load these docs
rb.req.getSearcher().getReader().document(docIds.get(0), selector);
}
ReaderCache.flushThreadLocalCaches(reader.getIndexName());
}
use of org.apache.solr.highlight.SolrHighlighter in project lucene-solr by apache.
the class SolrPluginUtils method optimizePreFetchDocs.
/**
* Pre-fetch documents into the index searcher's document cache.
*
* This is an entirely optional step which you might want to perform for
* the following reasons:
*
* <ul>
* <li>Locates the document-retrieval costs in one spot, which helps
* detailed performance measurement</li>
*
* <li>Determines a priori what fields will be needed to be fetched by
* various subtasks, like response writing and highlighting. This
* minimizes the chance that many needed fields will be loaded lazily.
* (it is more efficient to load all the field we require normally).</li>
* </ul>
*
* If lazy field loading is disabled, this method does nothing.
*/
public static void optimizePreFetchDocs(ResponseBuilder rb, DocList docs, Query query, SolrQueryRequest req, SolrQueryResponse res) throws IOException {
SolrIndexSearcher searcher = req.getSearcher();
if (!searcher.getDocFetcher().isLazyFieldLoadingEnabled()) {
// nothing to do
return;
}
ReturnFields returnFields = res.getReturnFields();
if (returnFields.getLuceneFieldNames() != null) {
Set<String> fieldFilter = returnFields.getLuceneFieldNames();
if (rb.doHighlights) {
// copy return fields list
fieldFilter = new HashSet<>(fieldFilter);
// add highlight fields
SolrHighlighter highlighter = HighlightComponent.getHighlighter(req.getCore());
for (String field : highlighter.getHighlightFields(query, req, null)) fieldFilter.add(field);
// fetch unique key if one exists.
SchemaField keyField = searcher.getSchema().getUniqueKeyField();
if (null != keyField)
fieldFilter.add(keyField.getName());
}
// get documents
DocIterator iter = docs.iterator();
for (int i = 0; i < docs.size(); i++) {
searcher.doc(iter.nextDoc(), fieldFilter);
}
}
}
Aggregations