Search in sources :

Example 1 with ElasticsearchClient

use of net.yacy.grid.io.index.ElasticsearchClient in project yacy_grid_mcp by yacy.

the class GSASearchService method serviceImpl.

@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
    // query Attributes:
    // for original GSA query attributes, see https://www.google.com/support/enterprise/static/gsa/docs/admin/74/gsa_doc_set/xml_reference/request_format.html#1082911
    String q = call.get("q", "");
    // in GSA: the maximum value of this parameter is 1000
    int num = call.get("num", call.get("rows", call.get("maximumRecords", 10)));
    // The index number of the results is 0-based
    int start = call.get("startRecord", call.get("start", 0));
    Classification.ContentDomain contentdom = Classification.ContentDomain.contentdomParser(call.get("contentdom", "all"));
    // important: call arguments may overrule parsed collection values if not empty. This can be used for authentified indexes!
    String site = call.get("site", call.get("collection", "").replace(',', '|'));
    String[] sites = site.length() == 0 ? new String[0] : site.split("\\|");
    int timezoneOffset = call.get("timezoneOffset", 0);
    boolean explain = call.get("explain", false);
    Sort sort = new Sort(call.get("sort", ""));
    String translatedQ = q;
    String daterange = call.get("daterange", "");
    if (daterange.length() > 0)
        translatedQ += " daterange:" + daterange;
    String as_filetype = call.get("as_filetype", "");
    // refers to as_filetype: only 'i' (include) or 'e' (exclude) allowed
    String as_ft = call.get("as_ft", "i");
    if (as_filetype.length() > 0)
        translatedQ += (as_ft.equals("i") ? " " : " -") + "filetype:" + as_filetype;
    String as_sitesearch = call.get("as_sitesearch", "");
    // refers to as_sitesearch: only 'i' (include) or 'e' (exclude) allowed
    String as_dt = call.get("as_dt", "i");
    if (as_sitesearch.length() > 0)
        translatedQ += (as_dt.equals("i") ? " " : " -") + "site:" + as_sitesearch;
    String queryXML = XML.escape(q);
    // prepare a query
    YaCyQuery yq = new YaCyQuery(translatedQ, sites, contentdom, timezoneOffset);
    ElasticsearchClient ec = Data.gridIndex.getElasticClient();
    HighlightBuilder hb = new HighlightBuilder().field(WebMapping.text_t.getMapping().name()).preTags("").postTags("").fragmentSize(140);
    ElasticsearchClient.Query query = ec.query("web", null, yq.queryBuilder, null, sort, hb, timezoneOffset, start, num, 0, explain);
    List<Map<String, Object>> result = query.results;
    List<String> explanations = query.explanations;
    // no xml encoder here on purpose, we will try to not have such things into our software in the future!
    StringBuffer sb = new StringBuffer(2048);
    sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n");
    // GSP
    sb.append("<GSP VER=\"3.2\">\n");
    sb.append("<!-- This is a Google Search Appliance API result, provided by YaCy Grid (see: https://github.com/yacy/yacy_grid_mcp). For the GSA protocol, see https://www.google.com/support/enterprise/static/gsa/docs/admin/74/gsa_doc_set/xml_reference/index.html -->\n");
    sb.append("<TM>0</TM>\n");
    sb.append("<Q>").append(queryXML).append("</Q>\n");
    sb.append("<PARAM name=\"output\" value=\"xml_no_dtd\" original_value=\"xml_no_dtd\"/>\n");
    sb.append("<PARAM name=\"ie\" value=\"UTF-8\" original_value=\"UTF-8\"/>\n");
    sb.append("<PARAM name=\"oe\" value=\"UTF-8\" original_value=\"UTF-8\"/>\n");
    sb.append("<PARAM name=\"q\" value=\"").append(queryXML).append("\" original_value=\"").append(queryXML).append("\"/>\n");
    sb.append("<PARAM name=\"start\" value=\"").append(Integer.toString(start)).append("\" original_value=\"").append(Integer.toString(start)).append("\"/>\n");
    sb.append("<PARAM name=\"num\" value=\"").append(Integer.toString(num)).append("\" original_value=\"").append(Integer.toString(num)).append("\"/>\n");
    sb.append("<PARAM name=\"site\" value=\"").append(XML.escape(site)).append("\" original_value=\"").append(XML.escape(site)).append("\"/>\n");
    // RES
    // SN; The index number (1-based) of this search result; EN: Indicates the index (1-based) of the last search result returned in this result set.
    sb.append("<RES SN=\"" + (start + 1) + "\" EN=\"" + (start + result.size()) + "\">\n");
    // this should show the estimated total number of results
    sb.append("<M>").append(Integer.toString(query.hitCount)).append("</M>\n");
    sb.append("<FI/>\n");
    // sb.append("<NB><NU>").append(getAPIPath()).append("?q=\"").append(queryXML).append("\"&amp;site=&amp;lr=&amp;ie=UTF-8&amp;oe=UTF-8&amp;output=xml_no_dtd&amp;client=&amp;access=&amp;sort=&amp;start=").append(Integer.toString(start)).append("&amp;num=").append(Integer.toString(num)).append("&amp;sa=N</NU></NB>\n");
    // List
    final AtomicInteger hit = new AtomicInteger(1);
    for (int hitc = 0; hitc < result.size(); hitc++) {
        WebDocument doc = new WebDocument(result.get(hitc));
        String titleXML = XML.escape(doc.getTitle());
        String link = doc.getLink();
        if (Classification.ContentDomain.IMAGE == contentdom)
            link = doc.pickImage((String) link);
        String linkXML = XML.escape(link.toString());
        String urlhash = Digest.encodeMD5Hex(link);
        String snippet = doc.getSnippet(query.highlights.get(hitc), yq);
        String snippetXML = XML.escape(snippet);
        Date last_modified_date = doc.getDate();
        int size = doc.getSize();
        int sizekb = size / 1024;
        int sizemb = sizekb / 1024;
        String size_string = sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte");
        // String host = doc.getString(WebMapping.host_s, "");
        sb.append("<R N=\"").append(Integer.toString(hit.getAndIncrement())).append("\" MIME=\"text/html\">\n");
        sb.append("<T>").append(titleXML).append("</T>\n");
        sb.append("<FS NAME=\"date\" VALUE=\"").append(DateParser.formatGSAFS(last_modified_date)).append("\"/>\n");
        sb.append("<CRAWLDATE>").append(DateParser.formatRFC1123(last_modified_date)).append("</CRAWLDATE>\n");
        sb.append("<LANG>en</LANG>\n");
        sb.append("<U>").append(linkXML).append("</U>\n");
        sb.append("<UE>").append(linkXML).append("</UE>\n");
        sb.append("<S>").append(snippetXML).append("</S>\n");
        sb.append("<COLS>dht</COLS>\n");
        sb.append("<HAS><L/><C SZ=\"").append(size_string).append("\" CID=\"").append(urlhash).append("\" ENC=\"UTF-8\"/></HAS>\n");
        // sb.append("<ENT_SOURCE>yacy_v1.921_20170616_9248.tar.gz/amBzuRuUFyt6</ENT_SOURCE>\n");
        if (explain) {
            sb.append("<EXPLANATION><![CDATA[" + explanations.get(hitc) + "]]></EXPLANATION>\n");
        }
        sb.append("</R>\n");
    }
    ;
    // END RES GSP
    sb.append("</RES>\n");
    sb.append("</GSP>\n");
    return new ServiceResponse(sb.toString());
}
Also used : YaCyQuery(net.yacy.grid.io.index.YaCyQuery) Date(java.util.Date) ServiceResponse(net.yacy.grid.http.ServiceResponse) WebDocument(net.yacy.grid.io.index.WebDocument) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) Classification(net.yacy.grid.tools.Classification) Sort(net.yacy.grid.io.index.Sort) ElasticsearchClient(net.yacy.grid.io.index.ElasticsearchClient) Map(java.util.Map) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)

Example 2 with ElasticsearchClient

use of net.yacy.grid.io.index.ElasticsearchClient in project yacy_grid_mcp by yacy.

the class YaCySearchService method serviceImpl.

@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
    String callback = call.get("callback", "");
    boolean jsonp = callback != null && callback.length() > 0;
    boolean minified = call.get("minified", false);
    boolean explain = call.get("explain", false);
    String q = call.get("query", "");
    Classification.ContentDomain contentdom = Classification.ContentDomain.contentdomParser(call.get("contentdom", "all"));
    // important: call arguments may overrule parsed collection values if not empty. This can be used for authentified indexes!
    String collection = call.get("collection", "");
    // to be compatible with the site-operator of GSA, we use a vertical pipe symbol here to divide collections.
    collection = collection.replace(',', '|');
    String[] collections = collection.length() == 0 ? new String[0] : collection.split("\\|");
    int maximumRecords = call.get("maximumRecords", call.get("rows", call.get("num", 10)));
    int startRecord = call.get("startRecord", call.get("start", 0));
    // int meanCount = call.get("meanCount", 5);
    int timezoneOffset = call.get("timezoneOffset", -1);
    // String nav = call.get("nav", "");
    // String prefermaskfilter = call.get("prefermaskfilter", "");
    // String constraint = call.get("constraint", "");
    int facetLimit = call.get("facetLimit", 10);
    String facetFields = call.get("facetFields", YaCyQuery.FACET_DEFAULT_PARAMETER);
    List<WebMapping> facetFieldMapping = new ArrayList<>();
    for (String s : facetFields.split(",")) facetFieldMapping.add(WebMapping.valueOf(s));
    Sort sort = new Sort(call.get("sort", ""));
    YaCyQuery yq = new YaCyQuery(q, collections, contentdom, timezoneOffset);
    ElasticsearchClient ec = Data.gridIndex.getElasticClient();
    HighlightBuilder hb = new HighlightBuilder().field(WebMapping.text_t.getMapping().name()).preTags("").postTags("").fragmentSize(140);
    ElasticsearchClient.Query query = ec.query("web", null, yq.queryBuilder, null, sort, hb, timezoneOffset, startRecord, maximumRecords, facetLimit, explain, facetFieldMapping.toArray(new WebMapping[facetFieldMapping.size()]));
    JSONObject json = new JSONObject(true);
    JSONArray channels = new JSONArray();
    json.put("channels", channels);
    JSONObject channel = new JSONObject(true);
    channels.put(channel);
    JSONArray items = new JSONArray();
    channel.put("title", "Search for " + q);
    channel.put("description", "Search for " + q);
    channel.put("startIndex", "" + startRecord);
    channel.put("itemsPerPage", "" + items.length());
    channel.put("searchTerms", q);
    channel.put("totalResults", Integer.toString(query.hitCount));
    channel.put("items", items);
    List<Map<String, Object>> result = query.results;
    List<String> explanations = query.explanations;
    for (int hitc = 0; hitc < result.size(); hitc++) {
        WebDocument doc = new WebDocument(result.get(hitc));
        JSONObject hit = new JSONObject(true);
        String titleString = doc.getTitle();
        String link = doc.getLink();
        if (Classification.ContentDomain.IMAGE == contentdom) {
            // the url before we extract the link
            hit.put("url", link);
            link = doc.pickImage((String) link);
            hit.put("icon", link);
            hit.put("image", link);
        }
        String snippet = doc.getSnippet(query.highlights.get(hitc), yq);
        Date last_modified_date = doc.getDate();
        int size = doc.getSize();
        int sizekb = size / 1024;
        int sizemb = sizekb / 1024;
        String size_string = sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte");
        String host = doc.getHost();
        hit.put("title", titleString);
        hit.put("link", link.toString());
        hit.put("description", snippet);
        hit.put("pubDate", DateParser.formatRFC1123(last_modified_date));
        hit.put("size", Integer.toString(size));
        hit.put("sizename", size_string);
        hit.put("host", host);
        if (explain) {
            hit.put("explanation", explanations.get(hitc));
        }
        items.put(hit);
    }
    ;
    JSONArray navigation = new JSONArray();
    channel.put("navigation", navigation);
    Map<String, List<Map.Entry<String, Long>>> aggregations = query.aggregations;
    for (Map.Entry<String, List<Map.Entry<String, Long>>> fe : aggregations.entrySet()) {
        String facetname = fe.getKey();
        WebMapping mapping = WebMapping.valueOf(facetname);
        JSONObject facetobject = new JSONObject(true);
        facetobject.put("facetname", mapping.getMapping().getFacetname());
        facetobject.put("displayname", mapping.getMapping().getDisplayname());
        facetobject.put("type", mapping.getMapping().getFacettype());
        facetobject.put("min", "0");
        facetobject.put("max", "0");
        facetobject.put("mean", "0");
        facetobject.put("count", fe.getValue().size());
        JSONArray elements = new JSONArray();
        facetobject.put("elements", elements);
        for (Map.Entry<String, Long> element : fe.getValue()) {
            JSONObject elementEntry = new JSONObject(true);
            elementEntry.put("name", element.getKey());
            elementEntry.put("count", element.getValue().toString());
            elementEntry.put("modifier", mapping.getMapping().getFacetmodifier() + ":" + element.getKey());
            elements.put(elementEntry);
        }
        navigation.put(facetobject);
    }
    if (jsonp) {
        StringBuilder sb = new StringBuilder(1024);
        sb.append(callback).append("([").append(json.toString(minified ? 0 : 2)).append("]);");
        return new ServiceResponse(sb.toString());
    } else {
        return new ServiceResponse(json);
    }
}
Also used : ArrayList(java.util.ArrayList) ServiceResponse(net.yacy.grid.http.ServiceResponse) Classification(net.yacy.grid.tools.Classification) Sort(net.yacy.grid.io.index.Sort) ArrayList(java.util.ArrayList) List(java.util.List) ElasticsearchClient(net.yacy.grid.io.index.ElasticsearchClient) JSONArray(org.json.JSONArray) YaCyQuery(net.yacy.grid.io.index.YaCyQuery) Date(java.util.Date) WebDocument(net.yacy.grid.io.index.WebDocument) JSONObject(org.json.JSONObject) WebMapping(net.yacy.grid.io.index.WebMapping) Map(java.util.Map) HighlightBuilder(org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)

Aggregations

Date (java.util.Date)2 Map (java.util.Map)2 ServiceResponse (net.yacy.grid.http.ServiceResponse)2 ElasticsearchClient (net.yacy.grid.io.index.ElasticsearchClient)2 Sort (net.yacy.grid.io.index.Sort)2 WebDocument (net.yacy.grid.io.index.WebDocument)2 YaCyQuery (net.yacy.grid.io.index.YaCyQuery)2 Classification (net.yacy.grid.tools.Classification)2 HighlightBuilder (org.elasticsearch.search.fetch.subphase.highlight.HighlightBuilder)2 ArrayList (java.util.ArrayList)1 List (java.util.List)1 AtomicInteger (java.util.concurrent.atomic.AtomicInteger)1 WebMapping (net.yacy.grid.io.index.WebMapping)1 JSONArray (org.json.JSONArray)1 JSONObject (org.json.JSONObject)1