Search in sources :

Example 1 with SolrHighlighter

use of org.apache.solr.highlight.SolrHighlighter in project lucene-solr by apache.

the class CarrotClusteringEngine method getDocuments.

/**
   * Prepares Carrot2 documents for clustering.
   */
private List<Document> getDocuments(SolrDocumentList solrDocList, Map<SolrDocument, Integer> docIds, Query query, final SolrQueryRequest sreq) throws IOException {
    SolrHighlighter highlighter = null;
    SolrParams solrParams = sreq.getParams();
    SolrCore core = sreq.getCore();
    String urlField = solrParams.get(CarrotParams.URL_FIELD_NAME, "url");
    String titleFieldSpec = solrParams.get(CarrotParams.TITLE_FIELD_NAME, "title");
    String snippetFieldSpec = solrParams.get(CarrotParams.SNIPPET_FIELD_NAME, titleFieldSpec);
    String languageField = solrParams.get(CarrotParams.LANGUAGE_FIELD_NAME, null);
    // Maps Solr field names to Carrot2 custom field names
    Map<String, String> customFields = getCustomFieldsMap(solrParams);
    // Parse language code map string into a map
    Map<String, String> languageCodeMap = new HashMap<>();
    if (StringUtils.isNotBlank(languageField)) {
        for (String pair : solrParams.get(CarrotParams.LANGUAGE_CODE_MAP, "").split("[, ]")) {
            final String[] split = pair.split(":");
            if (split.length == 2 && StringUtils.isNotBlank(split[0]) && StringUtils.isNotBlank(split[1])) {
                languageCodeMap.put(split[0], split[1]);
            } else {
                log.warn("Unsupported format for " + CarrotParams.LANGUAGE_CODE_MAP + ": '" + pair + "'. Skipping this mapping.");
            }
        }
    }
    // Get the documents
    boolean produceSummary = solrParams.getBool(CarrotParams.PRODUCE_SUMMARY, false);
    SolrQueryRequest req = null;
    String[] snippetFieldAry = null;
    if (produceSummary) {
        highlighter = HighlightComponent.getHighlighter(core);
        if (highlighter != null) {
            Map<String, Object> args = new HashMap<>();
            snippetFieldAry = snippetFieldSpec.split("[, ]");
            args.put(HighlightParams.FIELDS, snippetFieldAry);
            args.put(HighlightParams.HIGHLIGHT, "true");
            //we don't care about actually highlighting the area
            args.put(HighlightParams.SIMPLE_PRE, "");
            args.put(HighlightParams.SIMPLE_POST, "");
            args.put(HighlightParams.FRAGSIZE, solrParams.getInt(CarrotParams.SUMMARY_FRAGSIZE, solrParams.getInt(HighlightParams.FRAGSIZE, 100)));
            args.put(HighlightParams.SNIPPETS, solrParams.getInt(CarrotParams.SUMMARY_SNIPPETS, solrParams.getInt(HighlightParams.SNIPPETS, 1)));
            req = new LocalSolrQueryRequest(core, query.toString(), "", 0, 1, args) {

                @Override
                public SolrIndexSearcher getSearcher() {
                    return sreq.getSearcher();
                }
            };
        } else {
            log.warn("No highlighter configured, cannot produce summary");
            produceSummary = false;
        }
    }
    Iterator<SolrDocument> docsIter = solrDocList.iterator();
    List<Document> result = new ArrayList<>(solrDocList.size());
    float[] scores = { 1.0f };
    int[] docsHolder = new int[1];
    Query theQuery = query;
    while (docsIter.hasNext()) {
        SolrDocument sdoc = docsIter.next();
        String snippet = null;
        // See comment in ClusteringComponent#finishStage().
        if (produceSummary && docIds != null) {
            docsHolder[0] = docIds.get(sdoc).intValue();
            DocList docAsList = new DocSlice(0, 1, docsHolder, scores, 1, 1.0f);
            NamedList<Object> highlights = highlighter.doHighlighting(docAsList, theQuery, req, snippetFieldAry);
            if (highlights != null && highlights.size() == 1) {
                // should only be one value given our setup
                // should only be one document
                @SuppressWarnings("unchecked") NamedList<String[]> tmp = (NamedList<String[]>) highlights.getVal(0);
                final StringBuilder sb = new StringBuilder();
                for (int j = 0; j < snippetFieldAry.length; j++) {
                    // Join fragments with a period, so that Carrot2 does not create
                    // cross-fragment phrases, such phrases rarely make sense.
                    String[] highlt = tmp.get(snippetFieldAry[j]);
                    if (highlt != null && highlt.length > 0) {
                        for (int i = 0; i < highlt.length; i++) {
                            sb.append(highlt[i]);
                            sb.append(" . ");
                        }
                    }
                }
                snippet = sb.toString();
            }
        }
        // If summaries not enabled or summary generation failed, use full content.
        if (snippet == null) {
            snippet = getConcatenated(sdoc, snippetFieldSpec);
        }
        // Create a Carrot2 document
        Document carrotDocument = new Document(getConcatenated(sdoc, titleFieldSpec), snippet, ObjectUtils.toString(sdoc.getFieldValue(urlField), ""));
        // Store Solr id of the document, we need it to map document instances 
        // found in clusters back to identifiers.
        carrotDocument.setField(SOLR_DOCUMENT_ID, sdoc.getFieldValue(idFieldName));
        // Set language
        if (StringUtils.isNotBlank(languageField)) {
            Collection<Object> languages = sdoc.getFieldValues(languageField);
            if (languages != null) {
                // Use the first Carrot2-supported language
                for (Object l : languages) {
                    String lang = ObjectUtils.toString(l, "");
                    if (languageCodeMap.containsKey(lang)) {
                        lang = languageCodeMap.get(lang);
                    }
                    // language variants, such as 'zh-cn', but Carrot2 uses underscores.
                    if (lang.indexOf('-') > 0) {
                        lang = lang.replace('-', '_');
                    }
                    // If the language is supported by Carrot2, we'll get a non-null value
                    final LanguageCode carrot2Language = LanguageCode.forISOCode(lang);
                    if (carrot2Language != null) {
                        carrotDocument.setLanguage(carrot2Language);
                        break;
                    }
                }
            }
        }
        // Add custom fields
        if (customFields != null) {
            for (Entry<String, String> entry : customFields.entrySet()) {
                carrotDocument.setField(entry.getValue(), sdoc.getFieldValue(entry.getKey()));
            }
        }
        result.add(carrotDocument);
    }
    return result;
}
Also used : Query(org.apache.lucene.search.Query) HashMap(java.util.HashMap) SolrCore(org.apache.solr.core.SolrCore) ArrayList(java.util.ArrayList) Document(org.carrot2.core.Document) SolrDocument(org.apache.solr.common.SolrDocument) DocSlice(org.apache.solr.search.DocSlice) LanguageCode(org.carrot2.core.LanguageCode) SolrDocument(org.apache.solr.common.SolrDocument) NamedList(org.apache.solr.common.util.NamedList) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) LocalSolrQueryRequest(org.apache.solr.request.LocalSolrQueryRequest) SolrParams(org.apache.solr.common.params.SolrParams) DocList(org.apache.solr.search.DocList)

Example 2 with SolrHighlighter

use of org.apache.solr.highlight.SolrHighlighter in project lucene-solr by apache.

the class HighlightComponent method process.

@Override
public void process(ResponseBuilder rb) throws IOException {
    if (rb.doHighlights) {
        SolrQueryRequest req = rb.req;
        SolrParams params = req.getParams();
        SolrHighlighter highlighter = getHighlighter(params);
        //TODO: get from builder by default?
        String[] defaultHighlightFields = rb.getQparser() != null ? rb.getQparser().getDefaultHighlightFields() : null;
        Query highlightQuery = rb.getHighlightQuery();
        if (highlightQuery == null) {
            if (rb.getQparser() != null) {
                try {
                    highlightQuery = rb.getQparser().getHighlightQuery();
                    rb.setHighlightQuery(highlightQuery);
                } catch (Exception e) {
                    throw new SolrException(SolrException.ErrorCode.BAD_REQUEST, e);
                }
            } else {
                highlightQuery = rb.getQuery();
                rb.setHighlightQuery(highlightQuery);
            }
        }
        // No highlighting if there is no query -- consider q.alt=*:*
        if (highlightQuery != null) {
            NamedList sumData = highlighter.doHighlighting(rb.getResults().docList, highlightQuery, req, defaultHighlightFields);
            if (sumData != null) {
                // TODO ???? add this directly to the response?
                rb.rsp.add("highlighting", sumData);
            }
        }
    }
}
Also used : UnifiedSolrHighlighter(org.apache.solr.highlight.UnifiedSolrHighlighter) DefaultSolrHighlighter(org.apache.solr.highlight.DefaultSolrHighlighter) SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) PostingsSolrHighlighter(org.apache.solr.highlight.PostingsSolrHighlighter) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) Query(org.apache.lucene.search.Query) NamedList(org.apache.solr.common.util.NamedList) SolrParams(org.apache.solr.common.params.SolrParams) SolrException(org.apache.solr.common.SolrException) IOException(java.io.IOException) SolrException(org.apache.solr.common.SolrException)

Example 3 with SolrHighlighter

use of org.apache.solr.highlight.SolrHighlighter in project Solbase by Photobucket.

the class SolbaseComponent method process.

public void process(ResponseBuilder rb) throws IOException {
    DocList list = rb.getResults().docList;
    DocIterator it = list.iterator();
    List<Integer> docIds = new ArrayList<Integer>(list.size());
    while (it.hasNext()) docIds.add(it.next());
    IndexReader reader = (IndexReader) ((SolrIndexReader) rb.req.getSearcher().getIndexReader()).getWrappedReader();
    SolrQueryRequest req = rb.req;
    SolrParams params = req.getParams();
    String ids = params.get(ShardParams.IDS);
    // first phase of sharding only tries to fetch docids and scores which are already in tv
    if (SolbaseShardUtil.getNumShard() != 0) {
        if (ids != null) {
            IndexReader.firstPhase.set(false);
        } else {
            IndexReader.firstPhase.set(true);
        }
    } else {
        // it's always false in case of stand alone
        IndexReader.firstPhase.set(false);
    }
    logger.debug(reader.getIndexName() + " : Fetching " + docIds.size() + " Docs");
    if (docIds.size() > 0) {
        List<byte[]> fieldFilter = null;
        Set<String> returnFields = rb.rsp.getReturnFields();
        if (returnFields != null) {
            // copy return fields list
            fieldFilter = new ArrayList<byte[]>(returnFields.size());
            for (String field : returnFields) {
                fieldFilter.add(Bytes.toBytes(field));
            }
            // add highlight fields
            SolrHighlighter highligher = rb.req.getCore().getHighlighter();
            if (highligher.isHighlightingEnabled(rb.req.getParams())) {
                for (String field : highligher.getHighlightFields(rb.getQuery(), rb.req, null)) if (!returnFields.contains(field))
                    fieldFilter.add(Bytes.toBytes(field));
            }
            // fetch unique key if one exists.
            SchemaField keyField = rb.req.getSearcher().getSchema().getUniqueKeyField();
            if (null != keyField)
                if (!returnFields.contains(keyField))
                    fieldFilter.add(Bytes.toBytes(keyField.getName()));
        }
        FieldSelector selector = new SolbaseFieldSelector(docIds, fieldFilter);
        // This will bulk load these docs
        rb.req.getSearcher().getReader().document(docIds.get(0), selector);
    }
    ReaderCache.flushThreadLocalCaches(reader.getIndexName());
}
Also used : DocIterator(org.apache.solr.search.DocIterator) ArrayList(java.util.ArrayList) SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) SchemaField(org.apache.solr.schema.SchemaField) SolrQueryRequest(org.apache.solr.request.SolrQueryRequest) SolrIndexReader(org.apache.solr.search.SolrIndexReader) IndexReader(org.solbase.lucenehbase.IndexReader) SolrParams(org.apache.solr.common.params.SolrParams) FieldSelector(org.apache.lucene.document.FieldSelector) DocList(org.apache.solr.search.DocList)

Example 4 with SolrHighlighter

use of org.apache.solr.highlight.SolrHighlighter in project lucene-solr by apache.

the class SolrPluginUtils method optimizePreFetchDocs.

/**
   * Pre-fetch documents into the index searcher's document cache.
   *
   * This is an entirely optional step which you might want to perform for
   * the following reasons:
   *
   * <ul>
   *     <li>Locates the document-retrieval costs in one spot, which helps
   *     detailed performance measurement</li>
   *
   *     <li>Determines a priori what fields will be needed to be fetched by
   *     various subtasks, like response writing and highlighting.  This
   *     minimizes the chance that many needed fields will be loaded lazily.
   *     (it is more efficient to load all the field we require normally).</li>
   * </ul>
   *
   * If lazy field loading is disabled, this method does nothing.
   */
public static void optimizePreFetchDocs(ResponseBuilder rb, DocList docs, Query query, SolrQueryRequest req, SolrQueryResponse res) throws IOException {
    SolrIndexSearcher searcher = req.getSearcher();
    if (!searcher.getDocFetcher().isLazyFieldLoadingEnabled()) {
        // nothing to do
        return;
    }
    ReturnFields returnFields = res.getReturnFields();
    if (returnFields.getLuceneFieldNames() != null) {
        Set<String> fieldFilter = returnFields.getLuceneFieldNames();
        if (rb.doHighlights) {
            // copy return fields list
            fieldFilter = new HashSet<>(fieldFilter);
            // add highlight fields
            SolrHighlighter highlighter = HighlightComponent.getHighlighter(req.getCore());
            for (String field : highlighter.getHighlightFields(query, req, null)) fieldFilter.add(field);
            // fetch unique key if one exists.
            SchemaField keyField = searcher.getSchema().getUniqueKeyField();
            if (null != keyField)
                fieldFilter.add(keyField.getName());
        }
        // get documents
        DocIterator iter = docs.iterator();
        for (int i = 0; i < docs.size(); i++) {
            searcher.doc(iter.nextDoc(), fieldFilter);
        }
    }
}
Also used : SolrHighlighter(org.apache.solr.highlight.SolrHighlighter) SchemaField(org.apache.solr.schema.SchemaField) DocIterator(org.apache.solr.search.DocIterator) ReturnFields(org.apache.solr.search.ReturnFields) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher)

Aggregations

SolrHighlighter (org.apache.solr.highlight.SolrHighlighter)4 SolrParams (org.apache.solr.common.params.SolrParams)3 SolrQueryRequest (org.apache.solr.request.SolrQueryRequest)3 ArrayList (java.util.ArrayList)2 Query (org.apache.lucene.search.Query)2 NamedList (org.apache.solr.common.util.NamedList)2 SchemaField (org.apache.solr.schema.SchemaField)2 DocIterator (org.apache.solr.search.DocIterator)2 DocList (org.apache.solr.search.DocList)2 SolrIndexSearcher (org.apache.solr.search.SolrIndexSearcher)2 IOException (java.io.IOException)1 HashMap (java.util.HashMap)1 FieldSelector (org.apache.lucene.document.FieldSelector)1 SolrDocument (org.apache.solr.common.SolrDocument)1 SolrException (org.apache.solr.common.SolrException)1 SolrCore (org.apache.solr.core.SolrCore)1 DefaultSolrHighlighter (org.apache.solr.highlight.DefaultSolrHighlighter)1 PostingsSolrHighlighter (org.apache.solr.highlight.PostingsSolrHighlighter)1 UnifiedSolrHighlighter (org.apache.solr.highlight.UnifiedSolrHighlighter)1 LocalSolrQueryRequest (org.apache.solr.request.LocalSolrQueryRequest)1