Search in sources :

Example 6 with Ngram

use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.

the class ESIndex method spanNot.

public SearchResponse spanNot(SpanNotNgram ngram, String[] ids) {
    Ngram include = ngram.getInclude();
    String field1 = include.getField();
    int slop1 = include.getSlop();
    boolean inOrder1 = include.isInOrder();
    SpanNearQueryBuilder queryBuilder1 = QueryBuilders.spanNearQuery();
    for (String term : include.getTerms()) {
        queryBuilder1.clause(new SpanTermQueryBuilder(field1, term));
    }
    queryBuilder1.inOrder(inOrder1);
    queryBuilder1.slop(slop1);
    Ngram exclude = ngram.getExclude();
    String field2 = exclude.getField();
    int slop2 = exclude.getSlop();
    boolean inOrder2 = exclude.isInOrder();
    SpanNearQueryBuilder queryBuilder2 = QueryBuilders.spanNearQuery();
    for (String term : exclude.getTerms()) {
        queryBuilder2.clause(new SpanTermQueryBuilder(field2, term));
    }
    queryBuilder2.inOrder(inOrder2);
    queryBuilder2.slop(slop2);
    int pre = ngram.getPre();
    int post = ngram.getPost();
    SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery().include(queryBuilder1).exclude(queryBuilder2);
    //todo upgrade to 1.5
    //                .pre(pre).post(post);
    IdsFilterBuilder idsFilterBuilder = new IdsFilterBuilder(documentType);
    idsFilterBuilder.addIds(ids);
    SearchResponse response = client.prepareSearch(indexName).setSize(ids.length).setHighlighterFilter(false).setTrackScores(false).setNoFields().setExplain(false).setFetchSource(false).setQuery(QueryBuilders.filteredQuery(spanNotQueryBuilder, idsFilterBuilder)).execute().actionGet();
    return response;
}
Also used : SpanNotNgram(edu.neu.ccs.pyramid.feature.SpanNotNgram) Ngram(edu.neu.ccs.pyramid.feature.Ngram) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 7 with Ngram

use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.

the class ESIndex method analyze.

/**
     * analyze the given text using the provided analyzer, return an ngram
     * @param text
     * @param analyzer
     * @return
     */
public Ngram analyze(String text, String analyzer) {
    List<AnalyzeResponse.AnalyzeToken> tokens = client.admin().indices().prepareAnalyze(indexName, text).setAnalyzer(analyzer).get().getTokens();
    Ngram ngram = new Ngram();
    StringBuilder sb = new StringBuilder();
    for (int i = 0; i < tokens.size(); i++) {
        AnalyzeResponse.AnalyzeToken token = tokens.get(i);
        sb.append(token.getTerm());
        if (i != tokens.size() - 1) {
            sb.append(" ");
        }
    }
    ngram.setNgram(sb.toString());
    return ngram;
}
Also used : SpanNotNgram(edu.neu.ccs.pyramid.feature.SpanNotNgram) Ngram(edu.neu.ccs.pyramid.feature.Ngram) AnalyzeResponse(org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)

Example 8 with Ngram

use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.

the class ESIndex method spanNot.

public SearchResponse spanNot(SpanNotNgram ngram) {
    Ngram include = ngram.getInclude();
    String field1 = include.getField();
    int slop1 = include.getSlop();
    boolean inOrder1 = include.isInOrder();
    SpanNearQueryBuilder queryBuilder1 = QueryBuilders.spanNearQuery();
    for (String term : include.getTerms()) {
        queryBuilder1.clause(new SpanTermQueryBuilder(field1, term));
    }
    queryBuilder1.inOrder(inOrder1);
    queryBuilder1.slop(slop1);
    Ngram exclude = ngram.getExclude();
    String field2 = exclude.getField();
    int slop2 = exclude.getSlop();
    boolean inOrder2 = exclude.isInOrder();
    SpanNearQueryBuilder queryBuilder2 = QueryBuilders.spanNearQuery();
    for (String term : exclude.getTerms()) {
        queryBuilder2.clause(new SpanTermQueryBuilder(field2, term));
    }
    queryBuilder2.inOrder(inOrder2);
    queryBuilder2.slop(slop2);
    int pre = ngram.getPre();
    int post = ngram.getPost();
    SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery().include(queryBuilder1).exclude(queryBuilder2);
    //todo: upgrade to 1.5
    //                .pre(pre).post(post);
    SearchResponse response = client.prepareSearch(indexName).setSize(this.numDocs).setHighlighterFilter(false).setTrackScores(false).setNoFields().setExplain(false).setFetchSource(false).setQuery(spanNotQueryBuilder).execute().actionGet();
    return response;
}
Also used : SpanNotNgram(edu.neu.ccs.pyramid.feature.SpanNotNgram) Ngram(edu.neu.ccs.pyramid.feature.Ngram) SearchResponse(org.elasticsearch.action.search.SearchResponse)

Example 9 with Ngram

use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.

the class NgramEnumerator method add.

// each ngram is counted only once in each doc
private static void add(List<String> source, Multiset<Ngram> multiset, String field, int slop, List<Integer> template) {
    Multiset<Ngram> multiSetForDoc = ConcurrentHashMultiset.create();
    for (int i = 0; i < source.size(); i++) {
        if (i + template.get(template.size() - 1) < source.size()) {
            List<String> list = new ArrayList<>();
            for (int j : template) {
                list.add(source.get(i + j));
            }
            Ngram ngram = new Ngram();
            ngram.setNgram(Ngram.toNgramString(list));
            ngram.setSlop(slop);
            ngram.setField(field);
            ngram.setInOrder(true);
            multiSetForDoc.setCount(ngram, 1);
        }
    }
    multiset.addAll(multiSetForDoc);
}
Also used : Ngram(edu.neu.ccs.pyramid.feature.Ngram)

Example 10 with Ngram

use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.

the class StumpSelector method scores.

/**
     *
     * @param index
     * @param labels size = num labels * num data
     * @param feature
     * @param idTranslator
     * @param matchScoreType
     * @param docFilter
     */
public static double[] scores(ESIndex index, double[][] labels, Ngram feature, IdTranslator idTranslator, FeatureLoader.MatchScoreType matchScoreType, String docFilter) {
    Ngram ngram = null;
    try {
        ngram = (Ngram) Serialization.deepCopy(feature);
    } catch (IOException e) {
        e.printStackTrace();
    } catch (ClassNotFoundException e) {
        e.printStackTrace();
    }
    ngram.setIndex(0);
    DataSet dataSet = DataSetBuilder.getBuilder().numDataPoints(labels[0].length).numFeatures(1).build();
    FeatureLoader.loadNgramFeature(index, dataSet, ngram, idTranslator, matchScoreType, docFilter);
    double[] scores = new double[labels.length];
    for (int l = 0; l < scores.length; l++) {
        double score = score(dataSet, labels[l]);
        scores[l] = score;
    }
    return scores;
}
Also used : DataSet(edu.neu.ccs.pyramid.dataset.DataSet) IOException(java.io.IOException) Ngram(edu.neu.ccs.pyramid.feature.Ngram)

Aggregations

Ngram (edu.neu.ccs.pyramid.feature.Ngram)10 SpanNotNgram (edu.neu.ccs.pyramid.feature.SpanNotNgram)6 SearchResponse (org.elasticsearch.action.search.SearchResponse)4 ConcurrentHashMultiset (com.google.common.collect.ConcurrentHashMultiset)1 Multiset (com.google.common.collect.Multiset)1 DataSet (edu.neu.ccs.pyramid.dataset.DataSet)1 IOException (java.io.IOException)1 ArrayList (java.util.ArrayList)1 AnalyzeResponse (org.elasticsearch.action.admin.indices.analyze.AnalyzeResponse)1 SearchHit (org.elasticsearch.search.SearchHit)1