Search in sources :

Example 71 with SearchHit

use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.

the class ESIndex method termFilter.

/**
     * use as an inverted index
     * no score is computed
     * @param term stemmed term
     * @return
     * @throws Exception
     */
//    public List<String> termFilter(String field, String term, String[] ids) throws Exception{
//        StopWatch stopWatch=null;
//        if(logger.isDebugEnabled()){
//            stopWatch = new StopWatch();
//            stopWatch.start();
//        }
//
//        /**
//         * setSize() has a huge impact on performance, the smaller the faster
//         */
//
//        TermFilterBuilder termFilterBuilder = new TermFilterBuilder(field, term);
//        IdsFilterBuilder idsFilterBuilder = new IdsFilterBuilder(documentType);
//
//
//        idsFilterBuilder.addIds(ids);
//
//
//        SearchResponse response = client.prepareSearch(indexName).setSize(ids.length).
//                setHighlighterFilter(false).setTrackScores(false).
//                setNoFields().setExplain(false).setFetchSource(false).
//                setQuery(QueryBuilders.constantScoreQuery(
//                        FilterBuilders.andFilter(termFilterBuilder,
//                                idsFilterBuilder))).
//                execute().actionGet();
//        List<String> list = new ArrayList<>(response.getHits().getHits().length);
//        for (SearchHit searchHit : response.getHits()) {
//            list.add(searchHit.getId());
//        }
//        if(logger.isDebugEnabled()){
//            logger.debug("time spent on termFilter() for " + term + " = " + stopWatch+
//                    " There are "+list.size()+" matched docs");
//        }
//        return list;
//    }
public List<String> termFilter(String field, String term, String filterQuery, int size) throws Exception {
    StopWatch stopWatch = null;
    if (logger.isDebugEnabled()) {
        stopWatch = new StopWatch();
        stopWatch.start();
    }
    /**
         * setSize() has a huge impact on performance, the smaller the faster
         */
    TermFilterBuilder termFilterBuilder = new TermFilterBuilder(field, term);
    SearchResponse response = client.prepareSearch(indexName).setSize(size).setHighlighterFilter(false).setTrackScores(false).setNoFields().setExplain(false).setFetchSource(false).setQuery(QueryBuilders.constantScoreQuery(FilterBuilders.andFilter(termFilterBuilder, FilterBuilders.queryFilter(QueryBuilders.wrapperQuery(filterQuery))))).execute().actionGet();
    List<String> list = new ArrayList<>(response.getHits().getHits().length);
    for (SearchHit searchHit : response.getHits()) {
        list.add(searchHit.getId());
    }
    if (logger.isDebugEnabled()) {
        logger.debug("time spent on termFilter() for " + term + " = " + stopWatch + " There are " + list.size() + " matched docs");
    }
    return list;
}
Also used : SearchHit(org.elasticsearch.search.SearchHit) StopWatch(org.apache.commons.lang3.time.StopWatch) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 72 with SearchHit

use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.

the class FeatureLoader method loadNgramFeatureTFIFL.

// term frequency inverse field length
// field storing the length of the body field should be called body_field_length
private static void loadNgramFeatureTFIFL(ESIndex index, DataSet dataSet, Ngram feature, IdTranslator idTranslator, String docFilter) {
    int featureIndex = feature.getIndex();
    SearchResponse response = index.spanNearFrequency(feature, docFilter, idTranslator.numData());
    SearchHit[] hits = response.getHits().getHits();
    String field = feature.getField();
    String lengthField = field + "_" + "field_length";
    for (SearchHit hit : hits) {
        String indexId = hit.getId();
        float score = hit.getScore();
        float docLength = index.getFloatField(indexId, lengthField);
        double s = score / docLength;
        int algorithmId = idTranslator.toIntId(indexId);
        dataSet.setFeatureValue(algorithmId, featureIndex, s);
    }
}
Also used : SearchHit(org.elasticsearch.search.SearchHit) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 73 with SearchHit

use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.

the class FeatureLoader method loadNgramFeatureFrequency.

private static void loadNgramFeatureFrequency(ESIndex index, DataSet dataSet, Ngram feature, IdTranslator idTranslator, String docFilter) {
    int featureIndex = feature.getIndex();
    SearchResponse response = index.spanNearFrequency(feature, docFilter, idTranslator.numData());
    SearchHit[] hits = response.getHits().getHits();
    for (SearchHit hit : hits) {
        String indexId = hit.getId();
        float score = hit.getScore();
        int algorithmId = idTranslator.toIntId(indexId);
        dataSet.setFeatureValue(algorithmId, featureIndex, score);
    }
}
Also used : SearchHit(org.elasticsearch.search.SearchHit) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 74 with SearchHit

use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.

the class FeatureLoader method loadNgramFeatureOriginal.

private static void loadNgramFeatureOriginal(ESIndex index, DataSet dataSet, Ngram feature, IdTranslator idTranslator, String docFilter) {
    int featureIndex = feature.getIndex();
    SearchResponse response = index.spanNear(feature, docFilter, idTranslator.numData());
    SearchHit[] hits = response.getHits().getHits();
    for (SearchHit hit : hits) {
        String indexId = hit.getId();
        float score = hit.getScore();
        int algorithmId = idTranslator.toIntId(indexId);
        dataSet.setFeatureValue(algorithmId, featureIndex, score);
    }
}
Also used : SearchHit(org.elasticsearch.search.SearchHit) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 75 with SearchHit

use of org.elasticsearch.search.SearchHit in project pyramid by cheng-li.

the class ESIndexTest method test19.

static void test19() throws Exception {
    ESIndex index = new ESIndex.Builder().setClientType("node").setIndexName("ohsumed_20000").build();
    List<String> terms = new ArrayList<>();
    terms.add("repeated");
    terms.add("cyclophosphamide");
    terms.add("cycles");
    terms.add("study");
    SearchResponse response = index.minimumShouldMatch(terms, "body", 70);
    System.out.println(response.getHits().getTotalHits());
    for (SearchHit searchHit : response.getHits()) {
        System.out.println(searchHit.getId() + " " + searchHit.getScore());
    }
    index.close();
}
Also used : SearchHit(org.elasticsearch.search.SearchHit) ArrayList(java.util.ArrayList) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Aggregations

SearchHit (org.elasticsearch.search.SearchHit)166 SearchResponse (org.elasticsearch.action.search.SearchResponse)114 SearchHits (org.elasticsearch.search.SearchHits)52 ArrayList (java.util.ArrayList)31 ElasticsearchAssertions.assertSearchResponse (org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertSearchResponse)25 SearchRequestBuilder (org.elasticsearch.action.search.SearchRequestBuilder)22 IOException (java.io.IOException)20 Matchers.containsString (org.hamcrest.Matchers.containsString)17 IndexRequestBuilder (org.elasticsearch.action.index.IndexRequestBuilder)16 HashMap (java.util.HashMap)15 ScoreDoc (org.apache.lucene.search.ScoreDoc)14 SearchHitField (org.elasticsearch.search.SearchHitField)14 HashSet (java.util.HashSet)13 Map (java.util.Map)12 Test (org.junit.Test)12 AtomicReference (java.util.concurrent.atomic.AtomicReference)10 TopDocs (org.apache.lucene.search.TopDocs)10 Text (org.elasticsearch.common.text.Text)10 BulkRequestBuilder (org.elasticsearch.action.bulk.BulkRequestBuilder)9 BulkResponse (org.elasticsearch.action.bulk.BulkResponse)9