use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.
the class ESIndex method termFilter.
/**
* use as an inverted index
* no score is computed
* @param term stemmed term
* @return
* @throws Exception
*/
// public List<String> termFilter(String field, String term, String[] ids) throws Exception{
// StopWatch stopWatch=null;
// if(logger.isDebugEnabled()){
// stopWatch = new StopWatch();
// stopWatch.start();
// }
//
// /**
// * setSize() has a huge impact on performance, the smaller the faster
// */
//
// TermFilterBuilder termFilterBuilder = new TermFilterBuilder(field, term);
// IdsFilterBuilder idsFilterBuilder = new IdsFilterBuilder(documentType);
//
//
// idsFilterBuilder.addIds(ids);
//
//
// SearchResponse response = client.prepareSearch(indexName).setSize(ids.length).
// setHighlighterFilter(false).setTrackScores(false).
// setNoFields().setExplain(false).setFetchSource(false).
// setQuery(QueryBuilders.constantScoreQuery(
// FilterBuilders.andFilter(termFilterBuilder,
// idsFilterBuilder))).
// execute().actionGet();
// List<String> list = new ArrayList<>(response.getHits().getHits().length);
// for (SearchHit searchHit : response.getHits()) {
// list.add(searchHit.getId());
// }
// if(logger.isDebugEnabled()){
// logger.debug("time spent on termFilter() for " + term + " = " + stopWatch+
// " There are "+list.size()+" matched docs");
// }
// return list;
// }
public List<String> termFilter(String field, String term, String filterQuery, int size) throws Exception {
StopWatch stopWatch = null;
if (logger.isDebugEnabled()) {
stopWatch = new StopWatch();
stopWatch.start();
}
/**
* setSize() has a huge impact on performance, the smaller the faster
*/
TermFilterBuilder termFilterBuilder = new TermFilterBuilder(field, term);
SearchResponse response = client.prepareSearch(indexName).setSize(size).setHighlighterFilter(false).setTrackScores(false).setNoFields().setExplain(false).setFetchSource(false).setQuery(QueryBuilders.constantScoreQuery(FilterBuilders.andFilter(termFilterBuilder, FilterBuilders.queryFilter(QueryBuilders.wrapperQuery(filterQuery))))).execute().actionGet();
List<String> list = new ArrayList<>(response.getHits().getHits().length);
for (SearchHit searchHit : response.getHits()) {
list.add(searchHit.getId());
}
if (logger.isDebugEnabled()) {
logger.debug("time spent on termFilter() for " + term + " = " + stopWatch + " There are " + list.size() + " matched docs");
}
return list;
}
use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.
the class FeatureLoader method loadNgramFeatureTFIFL.
// term frequency inverse field length
// field storing the length of the body field should be called body_field_length
private static void loadNgramFeatureTFIFL(ESIndex index, DataSet dataSet, Ngram feature, IdTranslator idTranslator, String docFilter) {
int featureIndex = feature.getIndex();
SearchResponse response = index.spanNearFrequency(feature, docFilter, idTranslator.numData());
SearchHit[] hits = response.getHits().getHits();
String field = feature.getField();
String lengthField = field + "_" + "field_length";
for (SearchHit hit : hits) {
String indexId = hit.getId();
float score = hit.getScore();
float docLength = index.getFloatField(indexId, lengthField);
double s = score / docLength;
int algorithmId = idTranslator.toIntId(indexId);
dataSet.setFeatureValue(algorithmId, featureIndex, s);
}
}
use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.
the class FeatureLoader method loadNgramFeatureFrequency.
private static void loadNgramFeatureFrequency(ESIndex index, DataSet dataSet, Ngram feature, IdTranslator idTranslator, String docFilter) {
int featureIndex = feature.getIndex();
SearchResponse response = index.spanNearFrequency(feature, docFilter, idTranslator.numData());
SearchHit[] hits = response.getHits().getHits();
for (SearchHit hit : hits) {
String indexId = hit.getId();
float score = hit.getScore();
int algorithmId = idTranslator.toIntId(indexId);
dataSet.setFeatureValue(algorithmId, featureIndex, score);
}
}
use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.
the class FeatureLoader method loadNgramFeatureOriginal.
private static void loadNgramFeatureOriginal(ESIndex index, DataSet dataSet, Ngram feature, IdTranslator idTranslator, String docFilter) {
int featureIndex = feature.getIndex();
SearchResponse response = index.spanNear(feature, docFilter, idTranslator.numData());
SearchHit[] hits = response.getHits().getHits();
for (SearchHit hit : hits) {
String indexId = hit.getId();
float score = hit.getScore();
int algorithmId = idTranslator.toIntId(indexId);
dataSet.setFeatureValue(algorithmId, featureIndex, score);
}
}
use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.
the class ESIndexTest method test19.
static void test19() throws Exception {
ESIndex index = new ESIndex.Builder().setClientType("node").setIndexName("ohsumed_20000").build();
List<String> terms = new ArrayList<>();
terms.add("repeated");
terms.add("cyclophosphamide");
terms.add("cycles");
terms.add("study");
SearchResponse response = index.minimumShouldMatch(terms, "body", 70);
System.out.println(response.getHits().getTotalHits());
for (SearchHit searchHit : response.getHits()) {
System.out.println(searchHit.getId() + " " + searchHit.getScore());
}
index.close();
}
Aggregations