use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class ESIndex method spanNot.
public SearchResponse spanNot(SpanNotNgram ngram, String[] ids) {
Ngram include = ngram.getInclude();
String field1 = include.getField();
int slop1 = include.getSlop();
boolean inOrder1 = include.isInOrder();
SpanNearQueryBuilder queryBuilder1 = QueryBuilders.spanNearQuery();
for (String term : include.getTerms()) {
queryBuilder1.clause(new SpanTermQueryBuilder(field1, term));
}
queryBuilder1.inOrder(inOrder1);
queryBuilder1.slop(slop1);
Ngram exclude = ngram.getExclude();
String field2 = exclude.getField();
int slop2 = exclude.getSlop();
boolean inOrder2 = exclude.isInOrder();
SpanNearQueryBuilder queryBuilder2 = QueryBuilders.spanNearQuery();
for (String term : exclude.getTerms()) {
queryBuilder2.clause(new SpanTermQueryBuilder(field2, term));
}
queryBuilder2.inOrder(inOrder2);
queryBuilder2.slop(slop2);
int pre = ngram.getPre();
int post = ngram.getPost();
SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery().include(queryBuilder1).exclude(queryBuilder2);
//todo upgrade to 1.5
// .pre(pre).post(post);
IdsFilterBuilder idsFilterBuilder = new IdsFilterBuilder(documentType);
idsFilterBuilder.addIds(ids);
SearchResponse response = client.prepareSearch(indexName).setSize(ids.length).setHighlighterFilter(false).setTrackScores(false).setNoFields().setExplain(false).setFetchSource(false).setQuery(QueryBuilders.filteredQuery(spanNotQueryBuilder, idsFilterBuilder)).execute().actionGet();
return response;
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class ESIndex method analyze.
/**
* analyze the given text using the provided analyzer, return an ngram
* @param text
* @param analyzer
* @return
*/
public Ngram analyze(String text, String analyzer) {
List<AnalyzeResponse.AnalyzeToken> tokens = client.admin().indices().prepareAnalyze(indexName, text).setAnalyzer(analyzer).get().getTokens();
Ngram ngram = new Ngram();
StringBuilder sb = new StringBuilder();
for (int i = 0; i < tokens.size(); i++) {
AnalyzeResponse.AnalyzeToken token = tokens.get(i);
sb.append(token.getTerm());
if (i != tokens.size() - 1) {
sb.append(" ");
}
}
ngram.setNgram(sb.toString());
return ngram;
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class ESIndex method spanNot.
public SearchResponse spanNot(SpanNotNgram ngram) {
Ngram include = ngram.getInclude();
String field1 = include.getField();
int slop1 = include.getSlop();
boolean inOrder1 = include.isInOrder();
SpanNearQueryBuilder queryBuilder1 = QueryBuilders.spanNearQuery();
for (String term : include.getTerms()) {
queryBuilder1.clause(new SpanTermQueryBuilder(field1, term));
}
queryBuilder1.inOrder(inOrder1);
queryBuilder1.slop(slop1);
Ngram exclude = ngram.getExclude();
String field2 = exclude.getField();
int slop2 = exclude.getSlop();
boolean inOrder2 = exclude.isInOrder();
SpanNearQueryBuilder queryBuilder2 = QueryBuilders.spanNearQuery();
for (String term : exclude.getTerms()) {
queryBuilder2.clause(new SpanTermQueryBuilder(field2, term));
}
queryBuilder2.inOrder(inOrder2);
queryBuilder2.slop(slop2);
int pre = ngram.getPre();
int post = ngram.getPost();
SpanNotQueryBuilder spanNotQueryBuilder = QueryBuilders.spanNotQuery().include(queryBuilder1).exclude(queryBuilder2);
//todo: upgrade to 1.5
// .pre(pre).post(post);
SearchResponse response = client.prepareSearch(indexName).setSize(this.numDocs).setHighlighterFilter(false).setTrackScores(false).setNoFields().setExplain(false).setFetchSource(false).setQuery(spanNotQueryBuilder).execute().actionGet();
return response;
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class NgramEnumerator method add.
// each ngram is counted only once in each doc
private static void add(List<String> source, Multiset<Ngram> multiset, String field, int slop, List<Integer> template) {
Multiset<Ngram> multiSetForDoc = ConcurrentHashMultiset.create();
for (int i = 0; i < source.size(); i++) {
if (i + template.get(template.size() - 1) < source.size()) {
List<String> list = new ArrayList<>();
for (int j : template) {
list.add(source.get(i + j));
}
Ngram ngram = new Ngram();
ngram.setNgram(Ngram.toNgramString(list));
ngram.setSlop(slop);
ngram.setField(field);
ngram.setInOrder(true);
multiSetForDoc.setCount(ngram, 1);
}
}
multiset.addAll(multiSetForDoc);
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class StumpSelector method scores.
/**
*
* @param index
* @param labels size = num labels * num data
* @param feature
* @param idTranslator
* @param matchScoreType
* @param docFilter
*/
public static double[] scores(ESIndex index, double[][] labels, Ngram feature, IdTranslator idTranslator, FeatureLoader.MatchScoreType matchScoreType, String docFilter) {
Ngram ngram = null;
try {
ngram = (Ngram) Serialization.deepCopy(feature);
} catch (IOException e) {
e.printStackTrace();
} catch (ClassNotFoundException e) {
e.printStackTrace();
}
ngram.setIndex(0);
DataSet dataSet = DataSetBuilder.getBuilder().numDataPoints(labels[0].length).numFeatures(1).build();
FeatureLoader.loadNgramFeature(index, dataSet, ngram, idTranslator, matchScoreType, docFilter);
double[] scores = new double[labels.length];
for (int l = 0; l < scores.length; l++) {
double score = score(dataSet, labels[l]);
scores[l] = score;
}
return scores;
}
Aggregations