Search in sources :

Example 1 with FieldQuery

use of org.apache.lucene.search.vectorhighlight.FieldQuery in project elasticsearch by elastic.

the class FastVectorHighlighter method highlight.

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper mapper = highlighterContext.mapper;
    if (canHighlight(mapper) == false) {
        throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
    }
    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
    }
    HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
    try {
        FieldQuery fieldQuery;
        if (field.fieldOptions().requireFieldMatch()) {
            if (cache.fieldMatchFieldQuery == null) {
                /*
                     * we use top level reader to rewrite the query against all readers,
                     * with use caching it across hits (and across readers...)
                     */
                cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
            }
            fieldQuery = cache.fieldMatchFieldQuery;
        } else {
            if (cache.noFieldMatchFieldQuery == null) {
                /*
                     * we use top level reader to rewrite the query against all readers,
                     * with use caching it across hits (and across readers...)
                     */
                cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
            }
            fieldQuery = cache.noFieldMatchFieldQuery;
        }
        MapperHighlightEntry entry = cache.mappers.get(mapper);
        if (entry == null) {
            FragListBuilder fragListBuilder;
            BaseFragmentsBuilder fragmentsBuilder;
            final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
            boolean forceSource = context.highlight().forceSource(field);
            if (field.fieldOptions().numberOfFragments() == 0) {
                fragListBuilder = new SingleFragListBuilder();
                if (!forceSource && mapper.fieldType().stored()) {
                    fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                } else {
                    fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                }
            } else {
                fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
                if (field.fieldOptions().scoreOrdered()) {
                    if (!forceSource && mapper.fieldType().stored()) {
                        fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    } else {
                        fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    }
                } else {
                    if (!forceSource && mapper.fieldType().stored()) {
                        fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    } else {
                        fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    }
                }
            }
            fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
            entry = new MapperHighlightEntry();
            entry.fragListBuilder = fragListBuilder;
            entry.fragmentsBuilder = fragmentsBuilder;
            if (cache.fvh == null) {
                // parameters to FVH are not requires since:
                // first two booleans are not relevant since they are set on the CustomFieldQuery
                // (phrase and fieldMatch) fragment builders are used explicitly
                cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
            }
            CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
            cache.mappers.put(mapper, entry);
        }
        cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
        String[] fragments;
        // a HACK to make highlighter do highlighting, even though its using the single frag list builder
        int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments();
        int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize();
        // Only send matched fields if they were requested to save time.
        if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
            fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
        } else {
            fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
        }
        if (fragments != null && fragments.length > 0) {
            return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
        }
        int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
        if (noMatchSize > 0) {
            // Essentially we just request that a fragment is built from 0 to noMatchSize using
            // the normal fragmentsBuilder
            FieldFragList fieldFragList = new SimpleFieldFragList(-1);
            fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
            fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
            if (fragments != null && fragments.length > 0) {
                return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
            }
        }
        return null;
    } catch (Exception e) {
        throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
}
Also used : SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) FragListBuilder(org.apache.lucene.search.vectorhighlight.FragListBuilder) SearchContext(org.elasticsearch.search.internal.SearchContext) BaseFragmentsBuilder(org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder) ScoreOrderFragmentsBuilder(org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) Encoder(org.apache.lucene.search.highlight.Encoder) FetchSubPhase(org.elasticsearch.search.fetch.FetchSubPhase) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) FieldQuery(org.apache.lucene.search.vectorhighlight.FieldQuery) Field(org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) BoundaryScanner(org.apache.lucene.search.vectorhighlight.BoundaryScanner) BreakIteratorBoundaryScanner(org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner) SimpleBoundaryScanner(org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner) FieldFragList(org.apache.lucene.search.vectorhighlight.FieldFragList) SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder) FieldMapper(org.elasticsearch.index.mapper.FieldMapper)

Example 2 with FieldQuery

use of org.apache.lucene.search.vectorhighlight.FieldQuery in project SearchServices by Alfresco.

the class AlfrescoSolrHighlighter method doHighlighting.

/**
 * Generates a list of Highlighted query fragments for each item in a list
 * of documents, or returns null if highlighting is disabled.
 *
 * @param docs
 *            query results
 * @param query
 *            the query
 * @param req
 *            the current request
 * @param defaultFields
 *            default list of fields to summarize
 *
 * @return NamedList containing a NamedList for each document, which in
 *         turns contains sets (field, summary) pairs.
 */
@Override
@SuppressWarnings("unchecked")
public NamedList<Object> doHighlighting(DocList docs, Query query, SolrQueryRequest req, String[] defaultFields) throws IOException {
    SolrParams params = req.getParams();
    if (// also returns early if no unique
    !isHighlightingEnabled(params))
        // key field
        return null;
    boolean rewrite = query != null && !(Boolean.valueOf(params.get(HighlightParams.USE_PHRASE_HIGHLIGHTER, "true")) && Boolean.valueOf(params.get(HighlightParams.HIGHLIGHT_MULTI_TERM, "true")));
    if (rewrite) {
        query = query.rewrite(req.getSearcher().getIndexReader());
    }
    SolrIndexSearcher searcher = req.getSearcher();
    IndexSchema schema = searcher.getSchema();
    // fetch unique key if one exists.
    SchemaField keyField = schema.getUniqueKeyField();
    if (keyField == null) {
        // exit early; we need a unique key field to populate
        return null;
    // the response
    }
    String[] fieldNames = getHighlightFields(query, req, defaultFields);
    Set<String> preFetchFieldNames = getDocPrefetchFieldNames(fieldNames, req);
    if (preFetchFieldNames != null) {
        preFetchFieldNames.add(keyField.getName());
    }
    // lazy
    FastVectorHighlighter fvh = null;
    // lazy
    FieldQuery fvhFieldQuery = null;
    IndexReader reader = new TermVectorReusingLeafReader(req.getSearcher().getSlowAtomicReader());
    // Highlight each document
    NamedList fragments = new SimpleOrderedMap();
    DocIterator iterator = docs.iterator();
    for (int i = 0; i < docs.size(); i++) {
        int docId = iterator.nextDoc();
        Document doc = getDocument(searcher.doc(docId, preFetchFieldNames), req);
        @SuppressWarnings("rawtypes") NamedList docHighlights = new SimpleOrderedMap();
        // Highlight per-field
        for (String fieldName : fieldNames) {
            String schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName, FieldUse.HIGHLIGHT, req);
            // rewrite field specific parameters .....
            SchemaField schemaField = schema.getFieldOrNull(schemaFieldName);
            rewriteRequestParameters(params, fieldName, schemaFieldName, req);
            // object type allows flexibility for
            Object fieldHighlights;
            // subclassers
            if (schemaField == null) {
                fieldHighlights = null;
            } else if (schemaField.getType() instanceof org.apache.solr.schema.TrieField) {
                // TODO: highlighting numeric fields is broken (Lucene) - so
                // we disable them until fixed (see LUCENE-3080)!
                fieldHighlights = null;
            } else if (useFastVectorHighlighter(req.getParams(), schemaField)) {
                if (fvhFieldQuery == null) {
                    fvh = new FastVectorHighlighter(// per-field basis
                    req.getParams().getBool(HighlightParams.USE_PHRASE_HIGHLIGHTER, true), // parameter per-field basis
                    req.getParams().getBool(HighlightParams.FIELD_MATCH, false));
                    fvh.setPhraseLimit(req.getParams().getInt(HighlightParams.PHRASE_LIMIT, SolrHighlighter.DEFAULT_PHRASE_LIMIT));
                    fvhFieldQuery = fvh.getFieldQuery(query, reader);
                }
                fieldHighlights = null;
                FvhContainer fvhContainer = new FvhContainer(fvh, fvhFieldQuery);
                fieldHighlights = doHighlightingByFastVectorHighlighter(doc, docId, schemaField, fvhContainer, reader, req);
            } else {
                // standard/default highlighter
                fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
                // Fall back to the best FTS field if highlight fails
                if (fieldHighlights == null) {
                    schemaFieldName = AlfrescoSolrDataModel.getInstance().mapProperty(fieldName, FieldUse.HIGHLIGHT, req, 1);
                    if (schemaField != null) {
                        schemaField = schema.getFieldOrNull(schemaFieldName);
                        rewriteRequestParameters(params, fieldName, schemaFieldName, req);
                        fieldHighlights = doHighlightingByHighlighter(doc, docId, schemaField, query, reader, req);
                    }
                }
            }
            if (fieldHighlights == null) {
                // no summaries made; copy text from alternate field
                fieldHighlights = alternateField(doc, fieldName, req);
            }
            if (fieldHighlights != null) {
                docHighlights.add(fieldName, fieldHighlights);
            }
        }
        // for each field
        if (doc.get("DBID") != null) {
            docHighlights.add("DBID", doc.get("DBID"));
        }
        fragments.add(schema.printableUniqueKey(doc), docHighlights);
    }
    // for each doc
    return fragments;
}
Also used : FieldQuery(org.apache.lucene.search.vectorhighlight.FieldQuery) DocIterator(org.apache.solr.search.DocIterator) NamedList(org.apache.solr.common.util.NamedList) FastVectorHighlighter(org.apache.lucene.search.vectorhighlight.FastVectorHighlighter) SolrIndexSearcher(org.apache.solr.search.SolrIndexSearcher) Document(org.apache.lucene.document.Document) SolrInputDocument(org.apache.solr.common.SolrInputDocument) SimpleOrderedMap(org.apache.solr.common.util.SimpleOrderedMap) SchemaField(org.apache.solr.schema.SchemaField) IndexReader(org.apache.lucene.index.IndexReader) SolrParams(org.apache.solr.common.params.SolrParams) ModifiableSolrParams(org.apache.solr.common.params.ModifiableSolrParams) IndexSchema(org.apache.solr.schema.IndexSchema)

Aggregations

FieldQuery (org.apache.lucene.search.vectorhighlight.FieldQuery)2 Document (org.apache.lucene.document.Document)1 IndexReader (org.apache.lucene.index.IndexReader)1 Encoder (org.apache.lucene.search.highlight.Encoder)1 BaseFragmentsBuilder (org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder)1 BoundaryScanner (org.apache.lucene.search.vectorhighlight.BoundaryScanner)1 BreakIteratorBoundaryScanner (org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner)1 CustomFieldQuery (org.apache.lucene.search.vectorhighlight.CustomFieldQuery)1 FastVectorHighlighter (org.apache.lucene.search.vectorhighlight.FastVectorHighlighter)1 FieldFragList (org.apache.lucene.search.vectorhighlight.FieldFragList)1 FragListBuilder (org.apache.lucene.search.vectorhighlight.FragListBuilder)1 ScoreOrderFragmentsBuilder (org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder)1 SimpleBoundaryScanner (org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner)1 SimpleFieldFragList (org.apache.lucene.search.vectorhighlight.SimpleFieldFragList)1 SimpleFragListBuilder (org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder)1 SingleFragListBuilder (org.apache.lucene.search.vectorhighlight.SingleFragListBuilder)1 SolrInputDocument (org.apache.solr.common.SolrInputDocument)1 ModifiableSolrParams (org.apache.solr.common.params.ModifiableSolrParams)1 SolrParams (org.apache.solr.common.params.SolrParams)1 NamedList (org.apache.solr.common.util.NamedList)1