use of edu.neu.ccs.pyramid.elasticsearch.ESIndex in project pyramid by cheng-li.
the class App1 method keywordsFilter.
// public static String splitListToString(List<String> splitValues){
// String splitValueAll = "";
// for (int i=0;i<splitValues.size();i++){
// splitValueAll = splitValueAll+splitValues.get(i);
// if (i<splitValues.size()-1){
// splitValueAll = splitValueAll+"_";
// }
// }
// return splitValueAll;
// }
/**
* filter ngrams by given unigrams in the file
* do not filter unigram candidates
*/
private static Set<Ngram> keywordsFilter(Config config, ESIndex index, Set<Ngram> ngrams) throws IOException {
String externalKeywordsFile = config.getString("train.feature.filterNgrams.keyWordsFile");
List<String> lines = FileUtils.readLines(new File(externalKeywordsFile));
String analyzer = config.getString("train.feature.analyzer");
Set<String> keywords = new HashSet<>();
for (String line : lines) {
keywords.add(index.analyze(line, analyzer).getNgram());
}
return ngrams.stream().parallel().filter(ngram -> ngram.getN() == 1 || containsKeyWords(ngram, keywords)).collect(Collectors.toSet());
}
use of edu.neu.ccs.pyramid.elasticsearch.ESIndex in project pyramid by cheng-li.
the class IndexChecker method main.
public static void main(String[] args) throws Exception {
if (args.length != 1) {
throw new IllegalArgumentException("Please specify a properties file.");
}
Config config = new Config(args[0]);
System.out.println(config);
ESIndex index = loadIndex(config);
List<String> fields = config.getStrings("fieldsToCheck");
for (String field : fields) {
check(index, field);
}
for (String field : fields) {
checkEmpty(index, field);
}
index.close();
}
use of edu.neu.ccs.pyramid.elasticsearch.ESIndex in project pyramid by cheng-li.
the class IndexChecker method loadIndex.
static ESIndex loadIndex(Config config) throws Exception {
ESIndex.Builder builder = new ESIndex.Builder().setIndexName(config.getString("index.indexName")).setClusterName(config.getString("index.clusterName")).setClientType(config.getString("index.clientType")).setDocumentType(config.getString("index.documentType"));
if (config.getString("index.clientType").equals("transport")) {
String[] hosts = config.getString("index.hosts").split(Pattern.quote(","));
String[] ports = config.getString("index.ports").split(Pattern.quote(","));
builder.addHostsAndPorts(hosts, ports);
}
ESIndex index = builder.build();
System.out.println("index loaded");
System.out.println("there are " + index.getNumDocs() + " documents in the index.");
return index;
}
Aggregations