Search in sources :

Example 11 with HighlightBuilder

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder in project graylog2-server by Graylog2.

the class ESMessageList method applyHighlightingIfActivated.

private void applyHighlightingIfActivated(SearchSourceBuilder searchSourceBuilder, SearchJob job, Query query) {
    if (!allowHighlighting) {
        return;
    }
    final QueryStringQueryBuilder highlightQuery = decoratedHighlightQuery(job, query);
    searchSourceBuilder.highlighter(new HighlightBuilder().requireFieldMatch(false).highlightQuery(highlightQuery).field("*").fragmentSize(0).numOfFragments(0));
}
Also used : QueryStringQueryBuilder(org.graylog.shaded.elasticsearch6.org.elasticsearch.index.query.QueryStringQueryBuilder) HighlightBuilder(org.graylog.shaded.elasticsearch6.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)

Example 12 with HighlightBuilder

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder in project yacy_grid_mcp by yacy.

the class GSASearchService method serviceImpl.

@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
    // query Attributes:
    // for original GSA query attributes, see https://www.google.com/support/enterprise/static/gsa/docs/admin/74/gsa_doc_set/xml_reference/request_format.html#1082911
    String q = call.get("q", "");
    // in GSA: the maximum value of this parameter is 1000
    int num = call.get("num", call.get("rows", call.get("maximumRecords", 10)));
    // The index number of the results is 0-based
    int start = call.get("startRecord", call.get("start", 0));
    Classification.ContentDomain contentdom = Classification.ContentDomain.contentdomParser(call.get("contentdom", "all"));
    // important: call arguments may overrule parsed collection values if not empty. This can be used for authentified indexes!
    String site = call.get("site", call.get("collection", "").replace(',', '|'));
    String[] sites = site.length() == 0 ? new String[0] : site.split("\\|");
    int timezoneOffset = call.get("timezoneOffset", 0);
    boolean explain = call.get("explain", false);
    Sort sort = new Sort(call.get("sort", ""));
    String translatedQ = q;
    String daterange = call.get("daterange", "");
    if (daterange.length() > 0)
        translatedQ += " daterange:" + daterange;
    String as_filetype = call.get("as_filetype", "");
    // refers to as_filetype: only 'i' (include) or 'e' (exclude) allowed
    String as_ft = call.get("as_ft", "i");
    if (as_filetype.length() > 0)
        translatedQ += (as_ft.equals("i") ? " " : " -") + "filetype:" + as_filetype;
    String as_sitesearch = call.get("as_sitesearch", "");
    // refers to as_sitesearch: only 'i' (include) or 'e' (exclude) allowed
    String as_dt = call.get("as_dt", "i");
    if (as_sitesearch.length() > 0)
        translatedQ += (as_dt.equals("i") ? " " : " -") + "site:" + as_sitesearch;
    String queryXML = XML.escape(q);
    // prepare a query
    YaCyQuery yq = new YaCyQuery(translatedQ, sites, contentdom, timezoneOffset);
    ElasticsearchClient ec = Data.gridIndex.getElasticClient();
    HighlightBuilder hb = new HighlightBuilder().field(WebMapping.text_t.getMapping().name()).preTags("").postTags("").fragmentSize(140);
    ElasticsearchClient.Query query = ec.query("web", null, yq.queryBuilder, null, sort, hb, timezoneOffset, start, num, 0, explain);
    List<Map<String, Object>> result = query.results;
    List<String> explanations = query.explanations;
    // no xml encoder here on purpose, we will try to not have such things into our software in the future!
    StringBuffer sb = new StringBuffer(2048);
    sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n");
    // GSP
    sb.append("<GSP VER=\"3.2\">\n");
    sb.append("<!-- This is a Google Search Appliance API result, provided by YaCy Grid (see: https://github.com/yacy/yacy_grid_mcp). For the GSA protocol, see https://www.google.com/support/enterprise/static/gsa/docs/admin/74/gsa_doc_set/xml_reference/index.html -->\n");
    sb.append("<TM>0</TM>\n");
    sb.append("<Q>").append(queryXML).append("</Q>\n");
    sb.append("<PARAM name=\"output\" value=\"xml_no_dtd\" original_value=\"xml_no_dtd\"/>\n");
    sb.append("<PARAM name=\"ie\" value=\"UTF-8\" original_value=\"UTF-8\"/>\n");
    sb.append("<PARAM name=\"oe\" value=\"UTF-8\" original_value=\"UTF-8\"/>\n");
    sb.append("<PARAM name=\"q\" value=\"").append(queryXML).append("\" original_value=\"").append(queryXML).append("\"/>\n");
    sb.append("<PARAM name=\"start\" value=\"").append(Integer.toString(start)).append("\" original_value=\"").append(Integer.toString(start)).append("\"/>\n");
    sb.append("<PARAM name=\"num\" value=\"").append(Integer.toString(num)).append("\" original_value=\"").append(Integer.toString(num)).append("\"/>\n");
    sb.append("<PARAM name=\"site\" value=\"").append(XML.escape(site)).append("\" original_value=\"").append(XML.escape(site)).append("\"/>\n");
    // RES
    // SN; The index number (1-based) of this search result; EN: Indicates the index (1-based) of the last search result returned in this result set.
    sb.append("<RES SN=\"" + (start + 1) + "\" EN=\"" + (start + result.size()) + "\">\n");
    // this should show the estimated total number of results
    sb.append("<M>").append(Integer.toString(query.hitCount)).append("</M>\n");
    sb.append("<FI/>\n");
    // sb.append("<NB><NU>").append(getAPIPath()).append("?q=\"").append(queryXML).append("\"&amp;site=&amp;lr=&amp;ie=UTF-8&amp;oe=UTF-8&amp;output=xml_no_dtd&amp;client=&amp;access=&amp;sort=&amp;start=").append(Integer.toString(start)).append("&amp;num=").append(Integer.toString(num)).append("&amp;sa=N</NU></NB>\n");
    // List
    final AtomicInteger hit = new AtomicInteger(1);
    for (int hitc = 0; hitc < result.size(); hitc++) {
        WebDocument doc = new WebDocument(result.get(hitc));
        String titleXML = XML.escape(doc.getTitle());
        String link = doc.getLink();
        if (Classification.ContentDomain.IMAGE == contentdom)
            link = doc.pickImage((String) link);
        String linkXML = XML.escape(link.toString());
        String urlhash = Digest.encodeMD5Hex(link);
        String snippet = doc.getSnippet(query.highlights.get(hitc), yq);
        String snippetXML = XML.escape(snippet);
        Date last_modified_date = doc.getDate();
        int size = doc.getSize();
        int sizekb = size / 1024;
        int sizemb = sizekb / 1024;
        String size_string = sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte");
        // String host = doc.getString(WebMapping.host_s, "");
        sb.append("<R N=\"").append(Integer.toString(hit.getAndIncrement())).append("\" MIME=\"text/html\">\n");
        sb.append("<T>").append(titleXML).append("</T>\n");
        sb.append("<FS NAME=\"date\" VALUE=\"").append(DateParser.formatGSAFS(last_modified_date)).append("\"/>\n");
        sb.append("<CRAWLDATE>").append(DateParser.formatRFC1123(last_modified_date)).append("</CRAWLDATE>\n");
        sb.append("<LANG>en</LANG>\n");
        sb.append("<U>").append(linkXML).append("</U>\n");
        sb.append("<UE>").append(linkXML).append("</UE>\n");
        sb.append("<S>").append(snippetXML).append("</S>\n");
        sb.append("<COLS>dht</COLS>\n");
        sb.append("<HAS><L/><C SZ=\"").append(size_string).append("\" CID=\"").append(urlhash).append("\" ENC=\"UTF-8\"/></HAS>\n");
        // sb.append("<ENT_SOURCE>yacy_v1.921_20170616_9248.tar.gz/amBzuRuUFyt6</ENT_SOURCE>\n");
        if (explain) {
            sb.append("<EXPLANATION><![CDATA[" + explanations.get(hitc) + "]]></EXPLANATION>\n");
        }
        sb.append("</R>\n");
    }
    ;
    // END RES GSP
    sb.append("</RES>\n");
    sb.append("</GSP>\n");
    return new ServiceResponse(sb.toString());
}
Also used : YaCyQuery(net.yacy.grid.io.index.YaCyQuery) Date(java.util.Date) ServiceResponse(net.yacy.grid.http.ServiceResponse) WebDocument(net.yacy.grid.io.index.WebDocument) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Classification(net.yacy.grid.tools.Classification) Sort(net.yacy.grid.io.index.Sort) ElasticsearchClient(net.yacy.grid.io.index.ElasticsearchClient) Map(java.util.Map) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)

Example 13 with HighlightBuilder

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder in project elasticsearch by elastic.

the class SearchService method parseSource.

private void parseSource(DefaultSearchContext context, SearchSourceBuilder source) throws SearchContextException {
    // nothing to parse...
    if (source == null) {
        return;
    }
    QueryShardContext queryShardContext = context.getQueryShardContext();
    context.from(source.from());
    context.size(source.size());
    Map<String, InnerHitBuilder> innerHitBuilders = new HashMap<>();
    if (source.query() != null) {
        InnerHitBuilder.extractInnerHits(source.query(), innerHitBuilders);
        context.parsedQuery(queryShardContext.toQuery(source.query()));
    }
    if (source.postFilter() != null) {
        InnerHitBuilder.extractInnerHits(source.postFilter(), innerHitBuilders);
        context.parsedPostFilter(queryShardContext.toQuery(source.postFilter()));
    }
    if (innerHitBuilders.size() > 0) {
        for (Map.Entry<String, InnerHitBuilder> entry : innerHitBuilders.entrySet()) {
            try {
                entry.getValue().build(context, context.innerHits());
            } catch (IOException e) {
                throw new SearchContextException(context, "failed to build inner_hits", e);
            }
        }
    }
    if (source.sorts() != null) {
        try {
            Optional<SortAndFormats> optionalSort = SortBuilder.buildSort(source.sorts(), context.getQueryShardContext());
            if (optionalSort.isPresent()) {
                context.sort(optionalSort.get());
            }
        } catch (IOException e) {
            throw new SearchContextException(context, "failed to create sort elements", e);
        }
    }
    context.trackScores(source.trackScores());
    if (source.minScore() != null) {
        context.minimumScore(source.minScore());
    }
    if (source.profile()) {
        context.setProfilers(new Profilers(context.searcher()));
    }
    if (source.timeout() != null) {
        context.timeout(source.timeout());
    }
    context.terminateAfter(source.terminateAfter());
    if (source.aggregations() != null) {
        try {
            AggregatorFactories factories = source.aggregations().build(context, null);
            factories.validate();
            context.aggregations(new SearchContextAggregations(factories));
        } catch (IOException e) {
            throw new AggregationInitializationException("Failed to create aggregators", e);
        }
    }
    if (source.suggest() != null) {
        try {
            context.suggest(source.suggest().build(queryShardContext));
        } catch (IOException e) {
            throw new SearchContextException(context, "failed to create SuggestionSearchContext", e);
        }
    }
    if (source.rescores() != null) {
        try {
            for (RescoreBuilder<?> rescore : source.rescores()) {
                context.addRescore(rescore.build(queryShardContext));
            }
        } catch (IOException e) {
            throw new SearchContextException(context, "failed to create RescoreSearchContext", e);
        }
    }
    if (source.explain() != null) {
        context.explain(source.explain());
    }
    if (source.fetchSource() != null) {
        context.fetchSourceContext(source.fetchSource());
    }
    if (source.docValueFields() != null) {
        context.docValueFieldsContext(new DocValueFieldsContext(source.docValueFields()));
    }
    if (source.highlighter() != null) {
        HighlightBuilder highlightBuilder = source.highlighter();
        try {
            context.highlight(highlightBuilder.build(queryShardContext));
        } catch (IOException e) {
            throw new SearchContextException(context, "failed to create SearchContextHighlighter", e);
        }
    }
    if (source.scriptFields() != null) {
        for (org.elasticsearch.search.builder.SearchSourceBuilder.ScriptField field : source.scriptFields()) {
            SearchScript searchScript = scriptService.search(context.lookup(), field.script(), ScriptContext.Standard.SEARCH);
            context.scriptFields().add(new ScriptField(field.fieldName(), searchScript, field.ignoreFailure()));
        }
    }
    if (source.ext() != null) {
        for (SearchExtBuilder searchExtBuilder : source.ext()) {
            context.addSearchExt(searchExtBuilder);
        }
    }
    if (source.version() != null) {
        context.version(source.version());
    }
    if (source.stats() != null) {
        context.groupStats(source.stats());
    }
    if (source.searchAfter() != null && source.searchAfter().length > 0) {
        if (context.scrollContext() != null) {
            throw new SearchContextException(context, "`search_after` cannot be used in a scroll context.");
        }
        if (context.from() > 0) {
            throw new SearchContextException(context, "`from` parameter must be set to 0 when `search_after` is used.");
        }
        FieldDoc fieldDoc = SearchAfterBuilder.buildFieldDoc(context.sort(), source.searchAfter());
        context.searchAfter(fieldDoc);
    }
    if (source.slice() != null) {
        if (context.scrollContext() == null) {
            throw new SearchContextException(context, "`slice` cannot be used outside of a scroll context");
        }
        context.sliceBuilder(source.slice());
    }
    if (source.storedFields() != null) {
        if (source.storedFields().fetchFields() == false) {
            if (context.version()) {
                throw new SearchContextException(context, "`stored_fields` cannot be disabled if version is requested");
            }
            if (context.sourceRequested()) {
                throw new SearchContextException(context, "`stored_fields` cannot be disabled if _source is requested");
            }
        }
        context.storedFieldsContext(source.storedFields());
    }
    if (source.collapse() != null) {
        final CollapseContext collapseContext = source.collapse().build(context);
        context.collapse(collapseContext);
    }
}
Also used : FieldDoc(org.apache.lucene.search.FieldDoc) HashMap(java.util.HashMap) InnerHitBuilder(org.elasticsearch.index.query.InnerHitBuilder) Profilers(org.elasticsearch.search.profile.Profilers) SearchSourceBuilder(org.elasticsearch.search.builder.SearchSourceBuilder) ScriptField(org.elasticsearch.search.fetch.subphase.ScriptFieldsContext.ScriptField) QueryShardContext(org.elasticsearch.index.query.QueryShardContext) AggregatorFactories(org.elasticsearch.search.aggregations.AggregatorFactories) DocValueFieldsContext(org.elasticsearch.search.fetch.subphase.DocValueFieldsContext) SearchContextAggregations(org.elasticsearch.search.aggregations.SearchContextAggregations) AggregationInitializationException(org.elasticsearch.search.aggregations.AggregationInitializationException) IOException(java.io.IOException) SortAndFormats(org.elasticsearch.search.sort.SortAndFormats) SearchScript(org.elasticsearch.script.SearchScript) Map(java.util.Map) HashMap(java.util.HashMap) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder) CollapseContext(org.elasticsearch.search.collapse.CollapseContext)

Example 14 with HighlightBuilder

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder in project elasticsearch by elastic.

the class TopHitsIT method testTopHitsInNested.

public void testTopHitsInNested() throws Exception {
    SearchResponse searchResponse = client().prepareSearch("articles").addAggregation(histogram("dates").field("date").interval(5).order(Histogram.Order.aggregation("to-comments", true)).subAggregation(nested("to-comments", "comments").subAggregation(topHits("comments").highlighter(new HighlightBuilder().field(new HighlightBuilder.Field("comments.message").highlightQuery(matchQuery("comments.message", "text")))).sort("comments.id", SortOrder.ASC)))).get();
    Histogram histogram = searchResponse.getAggregations().get("dates");
    for (int i = 0; i < numArticles; i += 5) {
        Histogram.Bucket bucket = histogram.getBuckets().get(i / 5);
        assertThat(bucket.getDocCount(), equalTo(5L));
        long numNestedDocs = 10 + (5 * i);
        Nested nested = bucket.getAggregations().get("to-comments");
        assertThat(nested.getDocCount(), equalTo(numNestedDocs));
        TopHits hits = nested.getAggregations().get("comments");
        SearchHits searchHits = hits.getHits();
        assertThat(searchHits.getTotalHits(), equalTo(numNestedDocs));
        for (int j = 0; j < 3; j++) {
            assertThat(searchHits.getAt(j).getNestedIdentity().getField().string(), equalTo("comments"));
            assertThat(searchHits.getAt(j).getNestedIdentity().getOffset(), equalTo(0));
            assertThat((Integer) searchHits.getAt(j).getSourceAsMap().get("id"), equalTo(0));
            HighlightField highlightField = searchHits.getAt(j).getHighlightFields().get("comments.message");
            assertThat(highlightField.getFragments().length, equalTo(1));
            assertThat(highlightField.getFragments()[0].string(), equalTo("some <em>text</em>"));
        }
    }
}
Also used : SearchHitField(org.elasticsearch.search.SearchHitField) HighlightField(org.elasticsearch.search.fetch.subphase.highlight.HighlightField) Histogram(org.elasticsearch.search.aggregations.bucket.histogram.Histogram) TopHits(org.elasticsearch.search.aggregations.metrics.tophits.TopHits) Nested(org.elasticsearch.search.aggregations.bucket.nested.Nested) HighlightField(org.elasticsearch.search.fetch.subphase.highlight.HighlightField) SearchHits(org.elasticsearch.search.SearchHits) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Example 15 with HighlightBuilder

use of org.graylog.shaded.elasticsearch7.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder in project elasticsearch by elastic.

the class TopHitsIT method testFetchFeatures.

public void testFetchFeatures() {
    SearchResponse response = client().prepareSearch("idx").setTypes("type").setQuery(matchQuery("text", "text").queryName("test")).addAggregation(terms("terms").executionHint(randomExecutionHint()).field(TERMS_AGGS_FIELD).subAggregation(topHits("hits").size(1).highlighter(new HighlightBuilder().field("text")).explain(true).storedField("text").fieldDataField("field1").scriptField("script", new Script(ScriptType.INLINE, MockScriptEngine.NAME, "5", Collections.emptyMap())).fetchSource("text", null).version(true))).get();
    assertSearchResponse(response);
    Terms terms = response.getAggregations().get("terms");
    assertThat(terms, notNullValue());
    assertThat(terms.getName(), equalTo("terms"));
    assertThat(terms.getBuckets().size(), equalTo(5));
    for (Terms.Bucket bucket : terms.getBuckets()) {
        TopHits topHits = bucket.getAggregations().get("hits");
        SearchHits hits = topHits.getHits();
        assertThat(hits.getTotalHits(), equalTo(10L));
        assertThat(hits.getHits().length, equalTo(1));
        SearchHit hit = hits.getAt(0);
        HighlightField highlightField = hit.getHighlightFields().get("text");
        assertThat(highlightField.getFragments().length, equalTo(1));
        assertThat(highlightField.getFragments()[0].string(), equalTo("some <em>text</em> to entertain"));
        Explanation explanation = hit.getExplanation();
        assertThat(explanation.toString(), containsString("text:text"));
        long version = hit.getVersion();
        assertThat(version, equalTo(1L));
        assertThat(hit.getMatchedQueries()[0], equalTo("test"));
        SearchHitField field = hit.field("field1");
        assertThat(field.getValue().toString(), equalTo("5"));
        assertThat(hit.getSourceAsMap().get("text").toString(), equalTo("some text to entertain"));
        field = hit.field("script");
        assertThat(field.getValue().toString(), equalTo("5"));
        assertThat(hit.getSourceAsMap().size(), equalTo(1));
        assertThat(hit.getSourceAsMap().get("text").toString(), equalTo("some text to entertain"));
    }
}
Also used : Script(org.elasticsearch.script.Script) TopHits(org.elasticsearch.search.aggregations.metrics.tophits.TopHits) SearchHit(org.elasticsearch.search.SearchHit) Explanation(org.apache.lucene.search.Explanation) Terms(org.elasticsearch.search.aggregations.bucket.terms.Terms) HighlightField(org.elasticsearch.search.fetch.subphase.highlight.HighlightField) SearchHitField(org.elasticsearch.search.SearchHitField) SearchHits(org.elasticsearch.search.SearchHits) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder) SearchResponse(org.elasticsearch.action.search.SearchResponse) ElasticsearchAssertions.assertSearchResponse(org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)

Aggregations

HighlightBuilder (org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)21 SearchResponse (org.elasticsearch.action.search.SearchResponse)15 SearchHits (org.elasticsearch.search.SearchHits)6 ArrayList (java.util.ArrayList)5 Script (org.elasticsearch.script.Script)5 HighlightField (org.elasticsearch.search.fetch.subphase.highlight.HighlightField)5 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)5 InnerHitBuilder (org.elasticsearch.index.query.InnerHitBuilder)4 SearchHit (org.elasticsearch.search.SearchHit)4 HashMap (java.util.HashMap)3 Map (java.util.Map)3 BoolQueryBuilder (org.elasticsearch.index.query.BoolQueryBuilder)3 SearchHitField (org.elasticsearch.search.SearchHitField)3 TopHits (org.elasticsearch.search.aggregations.metrics.tophits.TopHits)3 HighlightBuilder (org.graylog.shaded.elasticsearch6.org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)3 IOException (java.io.IOException)2 Date (java.util.Date)2 ServiceResponse (net.yacy.grid.http.ServiceResponse)2 ElasticsearchClient (net.yacy.grid.io.index.ElasticsearchClient)2 Sort (net.yacy.grid.io.index.Sort)2