Search in sources :

Example 1 with CandidatePhrase

use of edu.stanford.nlp.patterns.CandidatePhrase in project CoreNLP by stanfordnlp.

the class LearnImportantFeatures method getDatum.

private RVFDatum<String, String> getDatum(CoreLabel[] sent, int i) {
    Counter<String> feat = new ClassicCounter<>();
    CoreLabel l = sent[i];
    String label;
    if (l.get(answerClass).toString().equals(answerLabel))
        label = answerLabel;
    else
        label = "O";
    CollectionValuedMap<String, CandidatePhrase> matchedPhrases = l.get(PatternsAnnotations.MatchedPhrases.class);
    if (matchedPhrases == null) {
        matchedPhrases = new CollectionValuedMap<>();
        matchedPhrases.add(label, CandidatePhrase.createOrGet(l.word()));
    }
    for (CandidatePhrase w : matchedPhrases.allValues()) {
        Integer num = this.clusterIds.get(w.getPhrase());
        if (num == null)
            num = -1;
        feat.setCount("Cluster-" + num, 1.0);
    }
    // feat.incrementCount("WORD-" + l.word());
    // feat.incrementCount("LEMMA-" + l.lemma());
    // feat.incrementCount("TAG-" + l.tag());
    int window = 0;
    for (int j = Math.max(0, i - window); j < i; j++) {
        CoreLabel lj = sent[j];
        feat.incrementCount("PREV-" + "WORD-" + lj.word());
        feat.incrementCount("PREV-" + "LEMMA-" + lj.lemma());
        feat.incrementCount("PREV-" + "TAG-" + lj.tag());
    }
    for (int j = i + 1; j < sent.length && j <= i + window; j++) {
        CoreLabel lj = sent[j];
        feat.incrementCount("NEXT-" + "WORD-" + lj.word());
        feat.incrementCount("NEXT-" + "LEMMA-" + lj.lemma());
        feat.incrementCount("NEXT-" + "TAG-" + lj.tag());
    }
    // System.out.println("adding " + l.word() + " as " + label);
    return new RVFDatum<>(feat, label);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) RVFDatum(edu.stanford.nlp.ling.RVFDatum) PatternsAnnotations(edu.stanford.nlp.patterns.PatternsAnnotations) CandidatePhrase(edu.stanford.nlp.patterns.CandidatePhrase)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)1 RVFDatum (edu.stanford.nlp.ling.RVFDatum)1 CandidatePhrase (edu.stanford.nlp.patterns.CandidatePhrase)1 PatternsAnnotations (edu.stanford.nlp.patterns.PatternsAnnotations)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1