use of net.yacy.grid.io.index.ElasticsearchClient in project yacy_grid_mcp by yacy.
the class GSASearchService method serviceImpl.
@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
// query Attributes:
// for original GSA query attributes, see https://www.google.com/support/enterprise/static/gsa/docs/admin/74/gsa_doc_set/xml_reference/request_format.html#1082911
String q = call.get("q", "");
// in GSA: the maximum value of this parameter is 1000
int num = call.get("num", call.get("rows", call.get("maximumRecords", 10)));
// The index number of the results is 0-based
int start = call.get("startRecord", call.get("start", 0));
Classification.ContentDomain contentdom = Classification.ContentDomain.contentdomParser(call.get("contentdom", "all"));
// important: call arguments may overrule parsed collection values if not empty. This can be used for authentified indexes!
String site = call.get("site", call.get("collection", "").replace(',', '|'));
String[] sites = site.length() == 0 ? new String[0] : site.split("\\|");
int timezoneOffset = call.get("timezoneOffset", 0);
boolean explain = call.get("explain", false);
Sort sort = new Sort(call.get("sort", ""));
String translatedQ = q;
String daterange = call.get("daterange", "");
if (daterange.length() > 0)
translatedQ += " daterange:" + daterange;
String as_filetype = call.get("as_filetype", "");
// refers to as_filetype: only 'i' (include) or 'e' (exclude) allowed
String as_ft = call.get("as_ft", "i");
if (as_filetype.length() > 0)
translatedQ += (as_ft.equals("i") ? " " : " -") + "filetype:" + as_filetype;
String as_sitesearch = call.get("as_sitesearch", "");
// refers to as_sitesearch: only 'i' (include) or 'e' (exclude) allowed
String as_dt = call.get("as_dt", "i");
if (as_sitesearch.length() > 0)
translatedQ += (as_dt.equals("i") ? " " : " -") + "site:" + as_sitesearch;
String queryXML = XML.escape(q);
// prepare a query
YaCyQuery yq = new YaCyQuery(translatedQ, sites, contentdom, timezoneOffset);
ElasticsearchClient ec = Data.gridIndex.getElasticClient();
HighlightBuilder hb = new HighlightBuilder().field(WebMapping.text_t.getMapping().name()).preTags("").postTags("").fragmentSize(140);
ElasticsearchClient.Query query = ec.query("web", null, yq.queryBuilder, null, sort, hb, timezoneOffset, start, num, 0, explain);
List<Map<String, Object>> result = query.results;
List<String> explanations = query.explanations;
// no xml encoder here on purpose, we will try to not have such things into our software in the future!
StringBuffer sb = new StringBuffer(2048);
sb.append("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n");
// GSP
sb.append("<GSP VER=\"3.2\">\n");
sb.append("<!-- This is a Google Search Appliance API result, provided by YaCy Grid (see: https://github.com/yacy/yacy_grid_mcp). For the GSA protocol, see https://www.google.com/support/enterprise/static/gsa/docs/admin/74/gsa_doc_set/xml_reference/index.html -->\n");
sb.append("<TM>0</TM>\n");
sb.append("<Q>").append(queryXML).append("</Q>\n");
sb.append("<PARAM name=\"output\" value=\"xml_no_dtd\" original_value=\"xml_no_dtd\"/>\n");
sb.append("<PARAM name=\"ie\" value=\"UTF-8\" original_value=\"UTF-8\"/>\n");
sb.append("<PARAM name=\"oe\" value=\"UTF-8\" original_value=\"UTF-8\"/>\n");
sb.append("<PARAM name=\"q\" value=\"").append(queryXML).append("\" original_value=\"").append(queryXML).append("\"/>\n");
sb.append("<PARAM name=\"start\" value=\"").append(Integer.toString(start)).append("\" original_value=\"").append(Integer.toString(start)).append("\"/>\n");
sb.append("<PARAM name=\"num\" value=\"").append(Integer.toString(num)).append("\" original_value=\"").append(Integer.toString(num)).append("\"/>\n");
sb.append("<PARAM name=\"site\" value=\"").append(XML.escape(site)).append("\" original_value=\"").append(XML.escape(site)).append("\"/>\n");
// RES
// SN; The index number (1-based) of this search result; EN: Indicates the index (1-based) of the last search result returned in this result set.
sb.append("<RES SN=\"" + (start + 1) + "\" EN=\"" + (start + result.size()) + "\">\n");
// this should show the estimated total number of results
sb.append("<M>").append(Integer.toString(query.hitCount)).append("</M>\n");
sb.append("<FI/>\n");
// sb.append("<NB><NU>").append(getAPIPath()).append("?q=\"").append(queryXML).append("\"&site=&lr=&ie=UTF-8&oe=UTF-8&output=xml_no_dtd&client=&access=&sort=&start=").append(Integer.toString(start)).append("&num=").append(Integer.toString(num)).append("&sa=N</NU></NB>\n");
// List
final AtomicInteger hit = new AtomicInteger(1);
for (int hitc = 0; hitc < result.size(); hitc++) {
WebDocument doc = new WebDocument(result.get(hitc));
String titleXML = XML.escape(doc.getTitle());
String link = doc.getLink();
if (Classification.ContentDomain.IMAGE == contentdom)
link = doc.pickImage((String) link);
String linkXML = XML.escape(link.toString());
String urlhash = Digest.encodeMD5Hex(link);
String snippet = doc.getSnippet(query.highlights.get(hitc), yq);
String snippetXML = XML.escape(snippet);
Date last_modified_date = doc.getDate();
int size = doc.getSize();
int sizekb = size / 1024;
int sizemb = sizekb / 1024;
String size_string = sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte");
// String host = doc.getString(WebMapping.host_s, "");
sb.append("<R N=\"").append(Integer.toString(hit.getAndIncrement())).append("\" MIME=\"text/html\">\n");
sb.append("<T>").append(titleXML).append("</T>\n");
sb.append("<FS NAME=\"date\" VALUE=\"").append(DateParser.formatGSAFS(last_modified_date)).append("\"/>\n");
sb.append("<CRAWLDATE>").append(DateParser.formatRFC1123(last_modified_date)).append("</CRAWLDATE>\n");
sb.append("<LANG>en</LANG>\n");
sb.append("<U>").append(linkXML).append("</U>\n");
sb.append("<UE>").append(linkXML).append("</UE>\n");
sb.append("<S>").append(snippetXML).append("</S>\n");
sb.append("<COLS>dht</COLS>\n");
sb.append("<HAS><L/><C SZ=\"").append(size_string).append("\" CID=\"").append(urlhash).append("\" ENC=\"UTF-8\"/></HAS>\n");
// sb.append("<ENT_SOURCE>yacy_v1.921_20170616_9248.tar.gz/amBzuRuUFyt6</ENT_SOURCE>\n");
if (explain) {
sb.append("<EXPLANATION><![CDATA[" + explanations.get(hitc) + "]]></EXPLANATION>\n");
}
sb.append("</R>\n");
}
;
// END RES GSP
sb.append("</RES>\n");
sb.append("</GSP>\n");
return new ServiceResponse(sb.toString());
}
use of net.yacy.grid.io.index.ElasticsearchClient in project yacy_grid_mcp by yacy.
the class YaCySearchService method serviceImpl.
@Override
public ServiceResponse serviceImpl(Query call, HttpServletResponse response) {
String callback = call.get("callback", "");
boolean jsonp = callback != null && callback.length() > 0;
boolean minified = call.get("minified", false);
boolean explain = call.get("explain", false);
String q = call.get("query", "");
Classification.ContentDomain contentdom = Classification.ContentDomain.contentdomParser(call.get("contentdom", "all"));
// important: call arguments may overrule parsed collection values if not empty. This can be used for authentified indexes!
String collection = call.get("collection", "");
// to be compatible with the site-operator of GSA, we use a vertical pipe symbol here to divide collections.
collection = collection.replace(',', '|');
String[] collections = collection.length() == 0 ? new String[0] : collection.split("\\|");
int maximumRecords = call.get("maximumRecords", call.get("rows", call.get("num", 10)));
int startRecord = call.get("startRecord", call.get("start", 0));
// int meanCount = call.get("meanCount", 5);
int timezoneOffset = call.get("timezoneOffset", -1);
// String nav = call.get("nav", "");
// String prefermaskfilter = call.get("prefermaskfilter", "");
// String constraint = call.get("constraint", "");
int facetLimit = call.get("facetLimit", 10);
String facetFields = call.get("facetFields", YaCyQuery.FACET_DEFAULT_PARAMETER);
List<WebMapping> facetFieldMapping = new ArrayList<>();
for (String s : facetFields.split(",")) facetFieldMapping.add(WebMapping.valueOf(s));
Sort sort = new Sort(call.get("sort", ""));
YaCyQuery yq = new YaCyQuery(q, collections, contentdom, timezoneOffset);
ElasticsearchClient ec = Data.gridIndex.getElasticClient();
HighlightBuilder hb = new HighlightBuilder().field(WebMapping.text_t.getMapping().name()).preTags("").postTags("").fragmentSize(140);
ElasticsearchClient.Query query = ec.query("web", null, yq.queryBuilder, null, sort, hb, timezoneOffset, startRecord, maximumRecords, facetLimit, explain, facetFieldMapping.toArray(new WebMapping[facetFieldMapping.size()]));
JSONObject json = new JSONObject(true);
JSONArray channels = new JSONArray();
json.put("channels", channels);
JSONObject channel = new JSONObject(true);
channels.put(channel);
JSONArray items = new JSONArray();
channel.put("title", "Search for " + q);
channel.put("description", "Search for " + q);
channel.put("startIndex", "" + startRecord);
channel.put("itemsPerPage", "" + items.length());
channel.put("searchTerms", q);
channel.put("totalResults", Integer.toString(query.hitCount));
channel.put("items", items);
List<Map<String, Object>> result = query.results;
List<String> explanations = query.explanations;
for (int hitc = 0; hitc < result.size(); hitc++) {
WebDocument doc = new WebDocument(result.get(hitc));
JSONObject hit = new JSONObject(true);
String titleString = doc.getTitle();
String link = doc.getLink();
if (Classification.ContentDomain.IMAGE == contentdom) {
// the url before we extract the link
hit.put("url", link);
link = doc.pickImage((String) link);
hit.put("icon", link);
hit.put("image", link);
}
String snippet = doc.getSnippet(query.highlights.get(hitc), yq);
Date last_modified_date = doc.getDate();
int size = doc.getSize();
int sizekb = size / 1024;
int sizemb = sizekb / 1024;
String size_string = sizemb > 0 ? (Integer.toString(sizemb) + " mbyte") : sizekb > 0 ? (Integer.toString(sizekb) + " kbyte") : (Integer.toString(size) + " byte");
String host = doc.getHost();
hit.put("title", titleString);
hit.put("link", link.toString());
hit.put("description", snippet);
hit.put("pubDate", DateParser.formatRFC1123(last_modified_date));
hit.put("size", Integer.toString(size));
hit.put("sizename", size_string);
hit.put("host", host);
if (explain) {
hit.put("explanation", explanations.get(hitc));
}
items.put(hit);
}
;
JSONArray navigation = new JSONArray();
channel.put("navigation", navigation);
Map<String, List<Map.Entry<String, Long>>> aggregations = query.aggregations;
for (Map.Entry<String, List<Map.Entry<String, Long>>> fe : aggregations.entrySet()) {
String facetname = fe.getKey();
WebMapping mapping = WebMapping.valueOf(facetname);
JSONObject facetobject = new JSONObject(true);
facetobject.put("facetname", mapping.getMapping().getFacetname());
facetobject.put("displayname", mapping.getMapping().getDisplayname());
facetobject.put("type", mapping.getMapping().getFacettype());
facetobject.put("min", "0");
facetobject.put("max", "0");
facetobject.put("mean", "0");
facetobject.put("count", fe.getValue().size());
JSONArray elements = new JSONArray();
facetobject.put("elements", elements);
for (Map.Entry<String, Long> element : fe.getValue()) {
JSONObject elementEntry = new JSONObject(true);
elementEntry.put("name", element.getKey());
elementEntry.put("count", element.getValue().toString());
elementEntry.put("modifier", mapping.getMapping().getFacetmodifier() + ":" + element.getKey());
elements.put(elementEntry);
}
navigation.put(facetobject);
}
if (jsonp) {
StringBuilder sb = new StringBuilder(1024);
sb.append(callback).append("([").append(json.toString(minified ? 0 : 2)).append("]);");
return new ServiceResponse(sb.toString());
} else {
return new ServiceResponse(json);
}
}
Aggregations