Search in sources :

Example 1 with Text

use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.

the class HighlightField method fromXContent.

public static HighlightField fromXContent(XContentParser parser) throws IOException {
    ensureExpectedToken(XContentParser.Token.FIELD_NAME, parser.currentToken(), parser::getTokenLocation);
    String fieldName = parser.currentName();
    Text[] fragments = null;
    XContentParser.Token token = parser.nextToken();
    if (token == XContentParser.Token.START_ARRAY) {
        List<Text> values = new ArrayList<>();
        while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
            values.add(new Text(parser.text()));
        }
        fragments = values.toArray(new Text[values.size()]);
    } else if (token == XContentParser.Token.VALUE_NULL) {
        fragments = null;
    } else {
        throw new ParsingException(parser.getTokenLocation(), "unexpected token type [" + token + "]");
    }
    return new HighlightField(fieldName, fragments);
}
Also used : ParsingException(org.elasticsearch.common.ParsingException) ArrayList(java.util.ArrayList) Text(org.elasticsearch.common.text.Text) XContentParser(org.elasticsearch.common.xcontent.XContentParser)

Example 2 with Text

use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.

the class HighlightField method toXContent.

@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
    builder.field(name);
    if (fragments == null) {
        builder.nullValue();
    } else {
        builder.startArray();
        for (Text fragment : fragments) {
            builder.value(fragment);
        }
        builder.endArray();
    }
    return builder;
}
Also used : Text(org.elasticsearch.common.text.Text)

Example 3 with Text

use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.

the class PlainHighlighter method highlight.

@Override
public HighlightField highlight(HighlighterContext highlighterContext) {
    SearchContextHighlight.Field field = highlighterContext.field;
    SearchContext context = highlighterContext.context;
    FetchSubPhase.HitContext hitContext = highlighterContext.hitContext;
    FieldMapper mapper = highlighterContext.mapper;
    Encoder encoder = field.fieldOptions().encoder().equals("html") ? HighlightUtils.Encoders.HTML : HighlightUtils.Encoders.DEFAULT;
    if (!hitContext.cache().containsKey(CACHE_KEY)) {
        Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> mappers = new HashMap<>();
        hitContext.cache().put(CACHE_KEY, mappers);
    }
    @SuppressWarnings("unchecked") Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter> cache = (Map<FieldMapper, org.apache.lucene.search.highlight.Highlighter>) hitContext.cache().get(CACHE_KEY);
    org.apache.lucene.search.highlight.Highlighter entry = cache.get(mapper);
    if (entry == null) {
        QueryScorer queryScorer = new CustomQueryScorer(highlighterContext.query, field.fieldOptions().requireFieldMatch() ? mapper.fieldType().name() : null);
        queryScorer.setExpandMultiTermQuery(true);
        Fragmenter fragmenter;
        if (field.fieldOptions().numberOfFragments() == 0) {
            fragmenter = new NullFragmenter();
        } else if (field.fieldOptions().fragmenter() == null) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else if ("simple".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleFragmenter(field.fieldOptions().fragmentCharSize());
        } else if ("span".equals(field.fieldOptions().fragmenter())) {
            fragmenter = new SimpleSpanFragmenter(queryScorer, field.fieldOptions().fragmentCharSize());
        } else {
            throw new IllegalArgumentException("unknown fragmenter option [" + field.fieldOptions().fragmenter() + "] for the field [" + highlighterContext.fieldName + "]");
        }
        Formatter formatter = new SimpleHTMLFormatter(field.fieldOptions().preTags()[0], field.fieldOptions().postTags()[0]);
        entry = new org.apache.lucene.search.highlight.Highlighter(formatter, encoder, queryScorer);
        entry.setTextFragmenter(fragmenter);
        // always highlight across all data
        entry.setMaxDocCharsToAnalyze(Integer.MAX_VALUE);
        cache.put(mapper, entry);
    }
    // a HACK to make highlighter do highlighting, even though its using the single frag list builder
    int numberOfFragments = field.fieldOptions().numberOfFragments() == 0 ? 1 : field.fieldOptions().numberOfFragments();
    ArrayList<TextFragment> fragsList = new ArrayList<>();
    List<Object> textsToHighlight;
    Analyzer analyzer = context.mapperService().documentMapper(hitContext.hit().getType()).mappers().indexAnalyzer();
    try {
        textsToHighlight = HighlightUtils.loadFieldValues(field, mapper, context, hitContext);
        for (Object textToHighlight : textsToHighlight) {
            String text;
            if (textToHighlight instanceof BytesRef) {
                text = mapper.fieldType().valueForDisplay(textToHighlight).toString();
            } else {
                text = textToHighlight.toString();
            }
            try (TokenStream tokenStream = analyzer.tokenStream(mapper.fieldType().name(), text)) {
                if (!tokenStream.hasAttribute(CharTermAttribute.class) || !tokenStream.hasAttribute(OffsetAttribute.class)) {
                    // can't perform highlighting if the stream has no terms (binary token stream) or no offsets
                    continue;
                }
                TextFragment[] bestTextFragments = entry.getBestTextFragments(tokenStream, text, false, numberOfFragments);
                for (TextFragment bestTextFragment : bestTextFragments) {
                    if (bestTextFragment != null && bestTextFragment.getScore() > 0) {
                        fragsList.add(bestTextFragment);
                    }
                }
            }
        }
    } catch (Exception e) {
        if (ExceptionsHelper.unwrap(e, BytesRefHash.MaxBytesLengthExceededException.class) != null) {
            // the plain highlighter will parse the source and try to analyze it.
            return null;
        } else {
            throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        }
    }
    if (field.fieldOptions().scoreOrdered()) {
        CollectionUtil.introSort(fragsList, new Comparator<TextFragment>() {

            @Override
            public int compare(TextFragment o1, TextFragment o2) {
                return Math.round(o2.getScore() - o1.getScore());
            }
        });
    }
    String[] fragments;
    // number_of_fragments is set to 0 but we have a multivalued field
    if (field.fieldOptions().numberOfFragments() == 0 && textsToHighlight.size() > 1 && fragsList.size() > 0) {
        fragments = new String[fragsList.size()];
        for (int i = 0; i < fragsList.size(); i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    } else {
        // refine numberOfFragments if needed
        numberOfFragments = fragsList.size() < numberOfFragments ? fragsList.size() : numberOfFragments;
        fragments = new String[numberOfFragments];
        for (int i = 0; i < fragments.length; i++) {
            fragments[i] = fragsList.get(i).toString();
        }
    }
    if (fragments.length > 0) {
        return new HighlightField(highlighterContext.fieldName, Text.convertFromStringArray(fragments));
    }
    int noMatchSize = highlighterContext.field.fieldOptions().noMatchSize();
    if (noMatchSize > 0 && textsToHighlight.size() > 0) {
        // Pull an excerpt from the beginning of the string but make sure to split the string on a term boundary.
        String fieldContents = textsToHighlight.get(0).toString();
        int end;
        try {
            end = findGoodEndForNoHighlightExcerpt(noMatchSize, analyzer, mapper.fieldType().name(), fieldContents);
        } catch (Exception e) {
            throw new FetchPhaseExecutionException(context, "Failed to highlight field [" + highlighterContext.fieldName + "]", e);
        }
        if (end > 0) {
            return new HighlightField(highlighterContext.fieldName, new Text[] { new Text(fieldContents.substring(0, end)) });
        }
    }
    return null;
}
Also used : TokenStream(org.apache.lucene.analysis.TokenStream) HashMap(java.util.HashMap) Formatter(org.apache.lucene.search.highlight.Formatter) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) ArrayList(java.util.ArrayList) SearchContext(org.elasticsearch.search.internal.SearchContext) TextFragment(org.apache.lucene.search.highlight.TextFragment) Analyzer(org.apache.lucene.analysis.Analyzer) SimpleFragmenter(org.apache.lucene.search.highlight.SimpleFragmenter) Encoder(org.apache.lucene.search.highlight.Encoder) SimpleFragmenter(org.apache.lucene.search.highlight.SimpleFragmenter) Fragmenter(org.apache.lucene.search.highlight.Fragmenter) SimpleSpanFragmenter(org.apache.lucene.search.highlight.SimpleSpanFragmenter) NullFragmenter(org.apache.lucene.search.highlight.NullFragmenter) FetchSubPhase(org.elasticsearch.search.fetch.FetchSubPhase) BytesRefHash(org.apache.lucene.util.BytesRefHash) BytesRef(org.apache.lucene.util.BytesRef) SimpleSpanFragmenter(org.apache.lucene.search.highlight.SimpleSpanFragmenter) QueryScorer(org.apache.lucene.search.highlight.QueryScorer) Text(org.elasticsearch.common.text.Text) NullFragmenter(org.apache.lucene.search.highlight.NullFragmenter) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) IOException(java.io.IOException) FetchPhaseExecutionException(org.elasticsearch.search.fetch.FetchPhaseExecutionException) SimpleHTMLFormatter(org.apache.lucene.search.highlight.SimpleHTMLFormatter) FieldMapper(org.elasticsearch.index.mapper.FieldMapper) HashMap(java.util.HashMap) Map(java.util.Map)

Example 4 with Text

use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.

the class FetchPhase method createSearchHit.

private SearchHit createSearchHit(SearchContext context, FieldsVisitor fieldsVisitor, int docId, int subDocId, LeafReaderContext subReaderContext) {
    if (fieldsVisitor == null) {
        return new SearchHit(docId);
    }
    loadStoredFields(context, subReaderContext, fieldsVisitor, subDocId);
    fieldsVisitor.postProcess(context.mapperService());
    Map<String, SearchHitField> searchFields = null;
    if (!fieldsVisitor.fields().isEmpty()) {
        searchFields = new HashMap<>(fieldsVisitor.fields().size());
        for (Map.Entry<String, List<Object>> entry : fieldsVisitor.fields().entrySet()) {
            searchFields.put(entry.getKey(), new SearchHitField(entry.getKey(), entry.getValue()));
        }
    }
    DocumentMapper documentMapper = context.mapperService().documentMapper(fieldsVisitor.uid().type());
    Text typeText;
    if (documentMapper == null) {
        typeText = new Text(fieldsVisitor.uid().type());
    } else {
        typeText = documentMapper.typeText();
    }
    SearchHit searchHit = new SearchHit(docId, fieldsVisitor.uid().id(), typeText, searchFields);
    // Set _source if requested.
    SourceLookup sourceLookup = context.lookup().source();
    sourceLookup.setSegmentAndDocument(subReaderContext, subDocId);
    if (fieldsVisitor.source() != null) {
        sourceLookup.setSource(fieldsVisitor.source());
    }
    return searchHit;
}
Also used : SourceLookup(org.elasticsearch.search.lookup.SourceLookup) SearchHit(org.elasticsearch.search.SearchHit) DocumentMapper(org.elasticsearch.index.mapper.DocumentMapper) SearchHitField(org.elasticsearch.search.SearchHitField) ArrayList(java.util.ArrayList) List(java.util.List) Text(org.elasticsearch.common.text.Text) HashMap(java.util.HashMap) Map(java.util.Map)

Example 5 with Text

use of org.elasticsearch.common.text.Text in project elasticsearch by elastic.

the class TermSuggester method innerExecute.

@Override
public TermSuggestion innerExecute(String name, TermSuggestionContext suggestion, IndexSearcher searcher, CharsRefBuilder spare) throws IOException {
    DirectSpellChecker directSpellChecker = suggestion.getDirectSpellCheckerSettings().createDirectSpellChecker();
    final IndexReader indexReader = searcher.getIndexReader();
    TermSuggestion response = new TermSuggestion(name, suggestion.getSize(), suggestion.getDirectSpellCheckerSettings().sort());
    List<Token> tokens = queryTerms(suggestion, spare);
    for (Token token : tokens) {
        // TODO: Extend DirectSpellChecker in 4.1, to get the raw suggested words as BytesRef
        SuggestWord[] suggestedWords = directSpellChecker.suggestSimilar(token.term, suggestion.getShardSize(), indexReader, suggestion.getDirectSpellCheckerSettings().suggestMode());
        Text key = new Text(new BytesArray(token.term.bytes()));
        TermSuggestion.Entry resultEntry = new TermSuggestion.Entry(key, token.startOffset, token.endOffset - token.startOffset);
        for (SuggestWord suggestWord : suggestedWords) {
            Text word = new Text(suggestWord.string);
            resultEntry.addOption(new TermSuggestion.Entry.Option(word, suggestWord.freq, suggestWord.score));
        }
        response.addTerm(resultEntry);
    }
    return response;
}
Also used : BytesArray(org.elasticsearch.common.bytes.BytesArray) IndexReader(org.apache.lucene.index.IndexReader) SuggestWord(org.apache.lucene.search.spell.SuggestWord) Text(org.elasticsearch.common.text.Text) DirectSpellChecker(org.apache.lucene.search.spell.DirectSpellChecker)

Aggregations

Text (org.elasticsearch.common.text.Text)50 SearchHit (org.elasticsearch.search.SearchHit)13 ArrayList (java.util.ArrayList)12 HashMap (java.util.HashMap)12 Map (java.util.Map)10 SearchHits (org.elasticsearch.search.SearchHits)10 IOException (java.io.IOException)9 BytesReference (org.elasticsearch.common.bytes.BytesReference)9 BytesArray (org.elasticsearch.common.bytes.BytesArray)7 CompletionSuggestion (org.elasticsearch.search.suggest.completion.CompletionSuggestion)7 List (java.util.List)5 SearchResponse (org.elasticsearch.action.search.SearchResponse)5 SearchHitField (org.elasticsearch.search.SearchHitField)5 HighlightField (org.elasticsearch.search.fetch.subphase.highlight.HighlightField)5 Option (org.elasticsearch.search.suggest.Suggest.Suggestion.Entry.Option)5 AtomicReference (java.util.concurrent.atomic.AtomicReference)4 BytesRef (org.apache.lucene.util.BytesRef)4 XContentBuilder (org.elasticsearch.common.xcontent.XContentBuilder)4 InternalSearchResponse (org.elasticsearch.search.internal.InternalSearchResponse)4 Entry (org.elasticsearch.search.suggest.Suggest.Suggestion.Entry)4