use of edu.stanford.nlp.patterns.CandidatePhrase in project CoreNLP by stanfordnlp.
the class LearnImportantFeatures method getDatum.
private RVFDatum<String, String> getDatum(CoreLabel[] sent, int i) {
Counter<String> feat = new ClassicCounter<>();
CoreLabel l = sent[i];
String label;
if (l.get(answerClass).toString().equals(answerLabel))
label = answerLabel;
else
label = "O";
CollectionValuedMap<String, CandidatePhrase> matchedPhrases = l.get(PatternsAnnotations.MatchedPhrases.class);
if (matchedPhrases == null) {
matchedPhrases = new CollectionValuedMap<>();
matchedPhrases.add(label, CandidatePhrase.createOrGet(l.word()));
}
for (CandidatePhrase w : matchedPhrases.allValues()) {
Integer num = this.clusterIds.get(w.getPhrase());
if (num == null)
num = -1;
feat.setCount("Cluster-" + num, 1.0);
}
// feat.incrementCount("WORD-" + l.word());
// feat.incrementCount("LEMMA-" + l.lemma());
// feat.incrementCount("TAG-" + l.tag());
int window = 0;
for (int j = Math.max(0, i - window); j < i; j++) {
CoreLabel lj = sent[j];
feat.incrementCount("PREV-" + "WORD-" + lj.word());
feat.incrementCount("PREV-" + "LEMMA-" + lj.lemma());
feat.incrementCount("PREV-" + "TAG-" + lj.tag());
}
for (int j = i + 1; j < sent.length && j <= i + window; j++) {
CoreLabel lj = sent[j];
feat.incrementCount("NEXT-" + "WORD-" + lj.word());
feat.incrementCount("NEXT-" + "LEMMA-" + lj.lemma());
feat.incrementCount("NEXT-" + "TAG-" + lj.tag());
}
// System.out.println("adding " + l.word() + " as " + label);
return new RVFDatum<>(feat, label);
}
Aggregations