use of org.apache.sdap.mudrod.ssearch.structure.SResult in project incubator-sdap-mudrod by apache.
the class Ranker method getVariance.
/**
* Method of calculating variance value
*
* @param attribute the attribute name that need to be calculated on
* @param resultList an array list of result
* @return variance value
*/
private double getVariance(String attribute, List<SResult> resultList) {
double mean = getMean(attribute, resultList);
double temp = 0.0;
double val;
for (SResult a : resultList) {
val = (Double) SResult.get(a, attribute);
temp += (mean - val) * (mean - val);
}
return getNDForm(temp / resultList.size());
}
use of org.apache.sdap.mudrod.ssearch.structure.SResult in project incubator-sdap-mudrod by apache.
the class Searcher method searchByQuery.
/**
* Main method of semantic search
*
* @param index index name in Elasticsearch
* @param type type name in Elasticsearch
* @param query regular query string
* @param queryOperator query mode- query, or, and
* @param rankOption a keyword used to dertermine the ElasticSearch SortOrder
* @return a list of search result
*/
@SuppressWarnings("unchecked")
public List<SResult> searchByQuery(String index, String type, String query, String queryOperator, String rankOption) {
boolean exists = es.getClient().admin().indices().prepareExists(index).execute().actionGet().isExists();
if (!exists) {
return new ArrayList<>();
}
SortOrder order = null;
String sortFiled = "";
switch(rankOption) {
case "Rank-AllTimePopularity":
sortFiled = "Dataset-AllTimePopularity";
order = SortOrder.DESC;
break;
case "Rank-MonthlyPopularity":
sortFiled = "Dataset-MonthlyPopularity";
order = SortOrder.DESC;
break;
case "Rank-UserPopularity":
sortFiled = "Dataset-UserPopularity";
order = SortOrder.DESC;
break;
case "Rank-LongName-Full":
sortFiled = "Dataset-LongName.raw";
order = SortOrder.ASC;
break;
case "Rank-ShortName-Full":
sortFiled = "Dataset-ShortName.raw";
order = SortOrder.ASC;
break;
case "Rank-GridSpatialResolution":
sortFiled = "Dataset-GridSpatialResolution";
order = SortOrder.DESC;
break;
case "Rank-SatelliteSpatialResolution":
sortFiled = "Dataset-SatelliteSpatialResolution";
order = SortOrder.DESC;
break;
case "Rank-StartTimeLong-Long":
sortFiled = "DatasetCoverage-StartTimeLong-Long";
order = SortOrder.ASC;
break;
case "Rank-StopTimeLong-Long":
sortFiled = "DatasetCoverage-StopTimeLong-Long";
order = SortOrder.DESC;
break;
default:
sortFiled = "Dataset-ShortName.raw";
order = SortOrder.ASC;
break;
}
Dispatcher dp = new Dispatcher(this.getConfig(), this.getES(), null);
BoolQueryBuilder qb = dp.createSemQuery(query, 1.0, queryOperator);
List<SResult> resultList = new ArrayList<>();
SearchRequestBuilder builder = es.getClient().prepareSearch(index).setTypes(type).setQuery(qb).addSort(sortFiled, order).setSize(500).setTrackScores(true);
SearchResponse response = builder.execute().actionGet();
for (SearchHit hit : response.getHits().getHits()) {
Map<String, Object> result = hit.getSource();
Double relevance = Double.valueOf(NDForm.format(hit.getScore()));
String shortName = (String) result.get("Dataset-ShortName");
String longName = (String) result.get("Dataset-LongName");
ArrayList<String> topicList = (ArrayList<String>) result.get("DatasetParameter-Variable");
String topic = "";
if (null != topicList) {
topic = String.join(", ", topicList);
}
String content = (String) result.get("Dataset-Description");
if (!"".equals(content)) {
int maxLength = (content.length() < MAX_CHAR) ? content.length() : MAX_CHAR;
content = content.trim().substring(0, maxLength - 1) + "...";
}
ArrayList<String> longdate = (ArrayList<String>) result.get("DatasetCitation-ReleaseDateLong");
Date date = new Date(Long.valueOf(longdate.get(0)));
SimpleDateFormat df2 = new SimpleDateFormat("MM/dd/yyyy");
String dateText = df2.format(date);
// start date
Long start = (Long) result.get("DatasetCoverage-StartTimeLong-Long");
Date startDate = new Date(start);
String startDateTxt = df2.format(startDate);
// end date
String end = (String) result.get("Dataset-DatasetCoverage-StopTimeLong");
String endDateTxt = "";
if ("".equals(end)) {
endDateTxt = "Present";
} else {
Date endDate = new Date(Long.valueOf(end));
endDateTxt = df2.format(endDate);
}
String processingLevel = (String) result.get("Dataset-ProcessingLevel");
Double proNum = getProLevelNum(processingLevel);
Double userPop = getPop(((Integer) result.get("Dataset-UserPopularity")).doubleValue());
Double allPop = getPop(((Integer) result.get("Dataset-AllTimePopularity")).doubleValue());
Double monthPop = getPop(((Integer) result.get("Dataset-MonthlyPopularity")).doubleValue());
List<String> sensors = (List<String>) result.get("DatasetSource-Sensor-ShortName");
SResult re = new SResult(shortName, longName, topic, content, dateText);
SResult.set(re, "term", relevance);
SResult.set(re, "releaseDate", Long.valueOf(longdate.get(0)).doubleValue());
SResult.set(re, "processingLevel", processingLevel);
SResult.set(re, "processingL", proNum);
SResult.set(re, "userPop", userPop);
SResult.set(re, "allPop", allPop);
SResult.set(re, "monthPop", monthPop);
SResult.set(re, "startDate", startDateTxt);
SResult.set(re, "endDate", endDateTxt);
SResult.set(re, "sensors", String.join(", ", sensors));
QueryBuilder queryLabelSearch = QueryBuilders.boolQuery().must(QueryBuilders.termQuery("query", query)).must(QueryBuilders.termQuery("dataID", shortName));
SearchResponse labelRes = es.getClient().prepareSearch(index).setTypes("trainingranking").setQuery(queryLabelSearch).setSize(5).execute().actionGet();
String labelString = null;
for (SearchHit label : labelRes.getHits().getHits()) {
Map<String, Object> labelItem = label.getSource();
labelString = (String) labelItem.get("label");
}
SResult.set(re, "label", labelString);
resultList.add(re);
}
return resultList;
}
use of org.apache.sdap.mudrod.ssearch.structure.SResult in project incubator-sdap-mudrod by apache.
the class Searcher method ssearch.
/**
* Method of semantic search to generate JSON string
*
* @param index index name in Elasticsearch
* @param type type name in Elasticsearch
* @param query regular query string
* @param queryOperator query mode- query, or, and
* @param rankOption a keyword used to dertermine the ElasticSearch SortOrder
* @param rr selected ranking method
* @return search results
*/
public String ssearch(String index, String type, String query, String queryOperator, String rankOption, Ranker rr) {
List<SResult> li = searchByQuery(index, type, query, queryOperator, rankOption);
if ("Rank-SVM".equals(rankOption)) {
li = rr.rank(li);
}
Gson gson = new Gson();
List<JsonObject> fileList = new ArrayList<>();
for (SResult aLi : li) {
JsonObject file = new JsonObject();
file.addProperty("Short Name", (String) SResult.get(aLi, "shortName"));
file.addProperty("Long Name", (String) SResult.get(aLi, "longName"));
file.addProperty("Topic", (String) SResult.get(aLi, "topic"));
file.addProperty("Description", (String) SResult.get(aLi, "description"));
file.addProperty("Release Date", (String) SResult.get(aLi, "relase_date"));
fileList.add(file);
file.addProperty("Start/End Date", (String) SResult.get(aLi, "startDate") + " - " + (String) SResult.get(aLi, "endDate"));
file.addProperty("Processing Level", (String) SResult.get(aLi, "processingLevel"));
file.addProperty("Sensor", (String) SResult.get(aLi, "sensors"));
}
JsonElement fileListElement = gson.toJsonTree(fileList);
JsonObject pDResults = new JsonObject();
pDResults.add("PDResults", fileListElement);
return pDResults.toString();
}
Aggregations