use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class NgramEnumerator method gatherNgram.
public static Multiset<Ngram> gatherNgram(ESIndex index, String[] ids, NgramTemplate template, int minDF) {
Multiset<Ngram> multiset = ConcurrentHashMultiset.create();
String field = template.getField();
Arrays.stream(ids).parallel().forEach(id -> {
Map<Integer, String> termVector = index.getTermVectorFromIndex(field, id);
add(termVector, multiset, template);
});
Multiset<Ngram> filtered = ConcurrentHashMultiset.create();
for (Multiset.Entry entry : multiset.entrySet()) {
Ngram ngram = (Ngram) entry.getElement();
int count = entry.getCount();
if (count >= minDF) {
filtered.add(ngram, count);
}
}
return filtered;
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class ESIndexTest method test17.
static void test17() throws Exception {
ESIndex index = new ESIndex.Builder().setClientType("node").setIndexName("imdb").build();
Ngram ngram = index.analyze("Story of a man who has unnatural feelings for a pig man test", "my_analyzer");
System.out.println(ngram);
index.close();
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class ESIndexTest method test21.
// static void test20() throws Exception{
// try(ESIndex index = new ESIndex.Builder().setClientType("node").setIndexName("ohsumed_20000")
// .build()){
// List<String> terms = new ArrayList<>();
// terms.add("repeated");
// terms.add("cyclophosphamide");
// terms.add("cycles");
// terms.add("study");
// String[] ids = {"AVYcLfPVDpWfZwAC_rp3", "AVYcLfbpDpWfZwAC_rt_"};
// SearchResponse response = index.minimumShouldMatch(terms, "body", 70, ids);
// System.out.println(response.getHits().getTotalHits());
// for (SearchHit searchHit : response.getHits()) {
// System.out.println(searchHit.getId()+" "+searchHit.getScore());
// }
// double a = 0/0;
// }
//
// }
static void test21() throws Exception {
try (ESIndex index = new ESIndex.Builder().setClientType("node").setIndexName("imdb").build()) {
Ngram ngram1 = new Ngram();
ngram1.setInOrder(true);
ngram1.setNgram("really nice");
ngram1.setField("body");
ngram1.setSlop(0);
String filterQuery = "{\"filtered\":{\"query\":{\"match_all\":{}},\"filter\":{\"term\":{\"split\":\"train\"}}}}";
SearchResponse response = index.spanNear(ngram1, filterQuery, 10);
for (SearchHit searchHit : response.getHits()) {
System.out.println(searchHit.getId() + " " + searchHit.getScore());
}
}
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class ESIndexTest method test12.
static void test12() throws Exception {
ESIndex index = new ESIndex.Builder().setClientType("node").setIndexName("imdb").build();
Ngram ngram1 = new Ngram();
ngram1.setInOrder(true);
ngram1.setNgram("recommend");
ngram1.setField("body");
ngram1.setSlop(0);
Ngram ngram2 = new Ngram();
ngram2.setInOrder(true);
ngram2.setNgram("not");
ngram2.setField("body");
ngram2.setSlop(0);
SpanNotNgram spanNotNgram = new SpanNotNgram();
spanNotNgram.setInclude(ngram1);
spanNotNgram.setExclude(ngram2);
spanNotNgram.setPre(2);
SearchResponse response = index.spanNot(spanNotNgram);
System.out.println(response.getHits().getTotalHits());
index.close();
}
use of edu.neu.ccs.pyramid.feature.Ngram in project pyramid by cheng-li.
the class BoundedBlockPriorityQueueTest method test2.
private static void test2() {
Comparator<Pair<Ngram, Double>> comparator = Comparator.comparing(p -> p.getSecond());
BoundedBlockPriorityQueue<Pair<Ngram, Double>> queue = new BoundedBlockPriorityQueue<>(3, comparator);
List<Double> all = new ArrayList<>();
for (int i = 0; i < 10; i++) {
all.add(Math.random());
}
System.out.println(all);
// for (double d: all){
// queue.add(d);
// }
IntStream.range(0, all.size()).parallel().forEach(d -> {
Ngram ngram = new Ngram();
ngram.setNgram("" + d);
queue.add(new Pair<>(ngram, all.get(d)));
});
Collections.sort(all);
System.out.println("sorted = " + all);
System.out.println(queue);
}
Aggregations