Search in sources :

Example 1 with CustomFieldQuery

use of org.apache.lucene.search.vectorhighlight.CustomFieldQuery in project elasticsearch by elastic.

the class FastVectorHighlighter method highlight.

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper mapper = highlighterContext.mapper;
    if (canHighlight(mapper) == false) {
        throw new IllegalArgumentException("the field [" + highlighterContext.fieldName + "] should be indexed with term vector with position offsets to be used with fast vector highlighter");
    }
    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        hitContext.cache().put(CACHE_KEY, new HighlighterEntry());
    }
    HighlighterEntry cache = (HighlighterEntry) hitContext.cache().get(CACHE_KEY);
    try {
        FieldQuery fieldQuery;
        if (field.fieldOptions().requireFieldMatch()) {
            if (cache.fieldMatchFieldQuery == null) {
                /*
                     * we use top level reader to rewrite the query against all readers,
                     * with use caching it across hits (and across readers...)
                     */
                cache.fieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
            }
            fieldQuery = cache.fieldMatchFieldQuery;
        } else {
            if (cache.noFieldMatchFieldQuery == null) {
                /*
                     * we use top level reader to rewrite the query against all readers,
                     * with use caching it across hits (and across readers...)
                     */
                cache.noFieldMatchFieldQuery = new CustomFieldQuery(highlighterContext.query, hitContext.topLevelReader(), true, field.fieldOptions().requireFieldMatch());
            }
            fieldQuery = cache.noFieldMatchFieldQuery;
        }
        MapperHighlightEntry entry = cache.mappers.get(mapper);
        if (entry == null) {
            FragListBuilder fragListBuilder;
            BaseFragmentsBuilder fragmentsBuilder;
            final BoundaryScanner boundaryScanner = getBoundaryScanner(field);
            boolean forceSource = context.highlight().forceSource(field);
            if (field.fieldOptions().numberOfFragments() == 0) {
                fragListBuilder = new SingleFragListBuilder();
                if (!forceSource && mapper.fieldType().stored()) {
                    fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                } else {
                    fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                }
            } else {
                fragListBuilder = field.fieldOptions().fragmentOffset() == -1 ? new SimpleFragListBuilder() : new SimpleFragListBuilder(field.fieldOptions().fragmentOffset());
                if (field.fieldOptions().scoreOrdered()) {
                    if (!forceSource && mapper.fieldType().stored()) {
                        fragmentsBuilder = new ScoreOrderFragmentsBuilder(field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    } else {
                        fragmentsBuilder = new SourceScoreOrderFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    }
                } else {
                    if (!forceSource && mapper.fieldType().stored()) {
                        fragmentsBuilder = new SimpleFragmentsBuilder(mapper, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    } else {
                        fragmentsBuilder = new SourceSimpleFragmentsBuilder(mapper, context, field.fieldOptions().preTags(), field.fieldOptions().postTags(), boundaryScanner);
                    }
                }
            }
            fragmentsBuilder.setDiscreteMultiValueHighlighting(termVectorMultiValue);
            entry = new MapperHighlightEntry();
            entry.fragListBuilder = fragListBuilder;
            entry.fragmentsBuilder = fragmentsBuilder;
            if (cache.fvh == null) {
                // parameters to FVH are not requires since:
                // first two booleans are not relevant since they are set on the CustomFieldQuery
                // (phrase and fieldMatch) fragment builders are used explicitly
                cache.fvh = new org.apache.lucene.search.vectorhighlight.FastVectorHighlighter();
            }
            CustomFieldQuery.highlightFilters.set(field.fieldOptions().highlightFilter());
            cache.mappers.put(mapper, entry);
        }
        cache.fvh.setPhraseLimit(field.fieldOptions().phraseLimit());
        String[] fragments;
        // a HACK to make highlighter do highlighting, even though its using the single frag list builder
        int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().numberOfFragments();
        int fragmentCharSize = field.fieldOptions().numberOfFragments() == 0 ? Integer.MAX_VALUE : field.fieldOptions().fragmentCharSize();
        // Only send matched fields if they were requested to save time.
        if (field.fieldOptions().matchedFields() != null && !field.fieldOptions().matchedFields().isEmpty()) {
            fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), field.fieldOptions().matchedFields(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
        } else {
            fragments = cache.fvh.getBestFragments(fieldQuery, hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fragmentCharSize, numberOfFragments, entry.fragListBuilder, entry.fragmentsBuilder, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
        }
        if (fragments != null && fragments.length > 0) {
            return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
        }
        int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
        if (noMatchSize > 0) {
            // Essentially we just request that a fragment is built from 0 to noMatchSize using
            // the normal fragmentsBuilder
            FieldFragList fieldFragList = new SimpleFieldFragList(-1);
            fieldFragList.add(0, noMatchSize, Collections.<WeightedPhraseInfo>emptyList());
            fragments = entry.fragmentsBuilder.createFragments(hitContext.reader(), hitContext.docId(), mapper.fieldType().name(), fieldFragList, 1, field.fieldOptions().preTags(), field.fieldOptions().postTags(), encoder);
            if (fragments != null && fragments.length > 0) {
                return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
            }
        }
        return null;
    } catch (Exception e) {
        throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
    }
}
Also used : SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) FragListBuilder(org.apache.lucene.search.vectorhighlight.FragListBuilder) SearchContext(org.elasticsearch.search.internal.SearchContext) BaseFragmentsBuilder(org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder) ScoreOrderFragmentsBuilder(org.apache.lucene.search.vectorhighlight.ScoreOrderFragmentsBuilder) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) Encoder(org.apache.lucene.search.highlight.Encoder) FetchSubPhase(org.elasticsearch.search.fetch.FetchSubPhase) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) FieldQuery(org.apache.lucene.search.vectorhighlight.FieldQuery) Field(org.elasticsearch.search.fetch.subphase.highlight.SearchContextHighlight.Field) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) SimpleFragListBuilder(org.apache.lucene.search.vectorhighlight.SimpleFragListBuilder) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) BoundaryScanner(org.apache.lucene.search.vectorhighlight.BoundaryScanner) BreakIteratorBoundaryScanner(org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner) SimpleBoundaryScanner(org.apache.lucene.search.vectorhighlight.SimpleBoundaryScanner) FieldFragList(org.apache.lucene.search.vectorhighlight.FieldFragList) SimpleFieldFragList(org.apache.lucene.search.vectorhighlight.SimpleFieldFragList) SingleFragListBuilder(org.apache.lucene.search.vectorhighlight.SingleFragListBuilder) FieldMapper(org.elasticsearch.index.mapper.FieldMapper)

Example 2 with CustomFieldQuery

use of org.apache.lucene.search.vectorhighlight.CustomFieldQuery in project elasticsearch by elastic.

the class VectorHighlighterTests method testVectorHighlighterPrefixQuery.

public void testVectorHighlighterPrefixQuery() throws Exception {
    Directory dir = new RAMDirectory();
    IndexWriter indexWriter = new IndexWriter(dir, new IndexWriterConfig(Lucene.STANDARD_ANALYZER));
    Document document = new Document();
    document.add(new TextField("_id", "1", Field.Store.YES));
    FieldType vectorsType = new FieldType(TextField.TYPE_STORED);
    vectorsType.setStoreTermVectors(true);
    vectorsType.setStoreTermVectorPositions(true);
    vectorsType.setStoreTermVectorOffsets(true);
    document.add(new Field("content", "the big bad dog", vectorsType));
    indexWriter.addDocument(document);
    IndexReader reader = DirectoryReader.open(indexWriter);
    IndexSearcher searcher = new IndexSearcher(reader);
    TopDocs topDocs = searcher.search(new TermQuery(new Term("_id", "1")), 1);
    assertThat(topDocs.totalHits, equalTo(1));
    FastVectorHighlighter highlighter = new FastVectorHighlighter();
    PrefixQuery prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_REWRITE.getClass().getName()));
    String fragment = highlighter.getBestFragment(highlighter.getFieldQuery(prefixQuery), reader, topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, nullValue());
    prefixQuery.setRewriteMethod(PrefixQuery.SCORING_BOOLEAN_REWRITE);
    Query rewriteQuery = prefixQuery.rewrite(reader);
    fragment = highlighter.getBestFragment(highlighter.getFieldQuery(rewriteQuery), reader, topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());
    // now check with the custom field query
    prefixQuery = new PrefixQuery(new Term("content", "ba"));
    assertThat(prefixQuery.getRewriteMethod().getClass().getName(), equalTo(PrefixQuery.CONSTANT_SCORE_REWRITE.getClass().getName()));
    fragment = highlighter.getBestFragment(new CustomFieldQuery(prefixQuery, reader, highlighter), reader, topDocs.scoreDocs[0].doc, "content", 30);
    assertThat(fragment, notNullValue());
}
Also used : IndexSearcher(org.apache.lucene.search.IndexSearcher) TermQuery(org.apache.lucene.search.TermQuery) Query(org.apache.lucene.search.Query) PrefixQuery(org.apache.lucene.search.PrefixQuery) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) TermQuery(org.apache.lucene.search.TermQuery) FastVectorHighlighter(org.apache.lucene.search.vectorhighlight.FastVectorHighlighter) Term(org.apache.lucene.index.Term) Document(org.apache.lucene.document.Document) RAMDirectory(org.apache.lucene.store.RAMDirectory) FieldType(org.apache.lucene.document.FieldType) TopDocs(org.apache.lucene.search.TopDocs) Field(org.apache.lucene.document.Field) TextField(org.apache.lucene.document.TextField) IndexWriter(org.apache.lucene.index.IndexWriter) PrefixQuery(org.apache.lucene.search.PrefixQuery) CustomFieldQuery(org.apache.lucene.search.vectorhighlight.CustomFieldQuery) IndexReader(org.apache.lucene.index.IndexReader) TextField(org.apache.lucene.document.TextField) RAMDirectory(org.apache.lucene.store.RAMDirectory) Directory(org.apache.lucene.store.Directory) IndexWriterConfig(org.apache.lucene.index.IndexWriterConfig)

Aggregations

CustomFieldQuery (org.apache.lucene.search.vectorhighlight.CustomFieldQuery)2 Document (org.apache.lucene.document.Document)1 Field (org.apache.lucene.document.Field)1 FieldType (org.apache.lucene.document.FieldType)1 TextField (org.apache.lucene.document.TextField)1 IndexReader (org.apache.lucene.index.IndexReader)1 IndexWriter (org.apache.lucene.index.IndexWriter)1 IndexWriterConfig (org.apache.lucene.index.IndexWriterConfig)1 Term (org.apache.lucene.index.Term)1 IndexSearcher (org.apache.lucene.search.IndexSearcher)1 PrefixQuery (org.apache.lucene.search.PrefixQuery)1 Query (org.apache.lucene.search.Query)1 TermQuery (org.apache.lucene.search.TermQuery)1 TopDocs (org.apache.lucene.search.TopDocs)1 Encoder (org.apache.lucene.search.highlight.Encoder)1 BaseFragmentsBuilder (org.apache.lucene.search.vectorhighlight.BaseFragmentsBuilder)1 BoundaryScanner (org.apache.lucene.search.vectorhighlight.BoundaryScanner)1 BreakIteratorBoundaryScanner (org.apache.lucene.search.vectorhighlight.BreakIteratorBoundaryScanner)1 FastVectorHighlighter (org.apache.lucene.search.vectorhighlight.FastVectorHighlighter)1 FieldFragList (org.apache.lucene.search.vectorhighlight.FieldFragList)1