use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class SimpleSentiment method classify.
/**
* @see SimpleSentiment#classify(CoreMap)
*/
public SentimentClass classify(String text) {
Annotation ann = new Annotation(text);
pipeline.get().annotate(ann);
CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
Counter<String> features = featurize(sentence);
RVFDatum<SentimentClass, String> datum = new RVFDatum<>(features);
return impl.classOf(datum);
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class SentenceAlgorithms method unescapeHTML.
/**
* A funky little helper method to interpret each token of the sentence as an HTML string, and translate it back to text.
* Note that this is <b>in place</b>.
*/
public void unescapeHTML() {
// Change in the protobuf
for (int i = 0; i < sentence.length(); ++i) {
CoreNLPProtos.Token.Builder token = sentence.rawToken(i);
token.setWord(StringUtils.unescapeHtml3(token.getWord()));
token.setLemma(StringUtils.unescapeHtml3(token.getLemma()));
}
// Change in the annotation
CoreMap cm = sentence.document.asAnnotation().get(CoreAnnotations.SentencesAnnotation.class).get(sentence.sentenceIndex());
for (CoreLabel token : cm.get(CoreAnnotations.TokensAnnotation.class)) {
token.setWord(StringUtils.unescapeHtml3(token.word()));
token.setLemma(StringUtils.unescapeHtml3(token.lemma()));
}
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method removeSpuriousMentionsEn.
protected void removeSpuriousMentionsEn(Annotation doc, List<List<Mention>> predictedMentions, Dictionaries dict) {
List<CoreMap> sentences = doc.get(CoreAnnotations.SentencesAnnotation.class);
for (int i = 0; i < predictedMentions.size(); i++) {
CoreMap s = sentences.get(i);
List<Mention> mentions = predictedMentions.get(i);
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Set<Mention> remove = Generics.newHashSet();
for (Mention m : mentions) {
String headPOS = m.headWord.get(CoreAnnotations.PartOfSpeechAnnotation.class);
// non word such as 'hmm'
if (dict.nonWords.contains(m.headString))
remove.add(m);
// check if the mention is noun and the next word is not noun
if (dict.isAdjectivalDemonym(m.spanToString())) {
if (!headPOS.startsWith("N") || (m.endIndex < sent.size() && sent.get(m.endIndex).tag().startsWith("N"))) {
remove.add(m);
}
}
// stop list (e.g., U.S., there)
if (inStopList(m))
remove.add(m);
}
mentions.removeAll(remove);
}
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method addGoldMentions.
// temporary for debug
protected static void addGoldMentions(List<CoreMap> sentences, List<Set<IntPair>> mentionSpanSetList, List<List<Mention>> predictedMentions, List<List<Mention>> allGoldMentions) {
for (int i = 0, sz = sentences.size(); i < sz; i++) {
List<Mention> mentions = predictedMentions.get(i);
CoreMap sent = sentences.get(i);
List<CoreLabel> tokens = sent.get(TokensAnnotation.class);
Set<IntPair> mentionSpanSet = mentionSpanSetList.get(i);
List<Mention> golds = allGoldMentions.get(i);
for (Mention g : golds) {
IntPair pair = new IntPair(g.startIndex, g.endIndex);
if (!mentionSpanSet.contains(pair)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, g.startIndex, g.endIndex, tokens, sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) != null ? sent.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class) : sent.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class), new ArrayList<>(tokens.subList(g.startIndex, g.endIndex)));
mentions.add(m);
mentionSpanSet.add(pair);
}
}
}
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class CorefMentionFinder method extractEnumerations.
protected static void extractEnumerations(CoreMap s, List<Mention> mentions, Set<IntPair> mentionSpanSet, Set<IntPair> namedEntitySpanSet) {
List<CoreLabel> sent = s.get(CoreAnnotations.TokensAnnotation.class);
Tree tree = s.get(TreeCoreAnnotations.TreeAnnotation.class);
SemanticGraph basicDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
SemanticGraph enhancedDependency = s.get(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
if (enhancedDependency == null) {
enhancedDependency = s.get(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
}
TregexPattern tgrepPattern = enumerationsMentionPattern;
TregexMatcher matcher = tgrepPattern.matcher(tree);
Map<IntPair, Tree> spanToMentionSubTree = Generics.newHashMap();
while (matcher.find()) {
matcher.getMatch();
Tree m1 = matcher.getNode("m1");
Tree m2 = matcher.getNode("m2");
List<Tree> mLeaves = m1.getLeaves();
int beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
int endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m1);
mLeaves = m2.getLeaves();
beginIdx = ((CoreLabel) mLeaves.get(0).label()).get(CoreAnnotations.IndexAnnotation.class) - 1;
endIdx = ((CoreLabel) mLeaves.get(mLeaves.size() - 1).label()).get(CoreAnnotations.IndexAnnotation.class);
spanToMentionSubTree.put(new IntPair(beginIdx, endIdx), m2);
}
for (Map.Entry<IntPair, Tree> spanMention : spanToMentionSubTree.entrySet()) {
IntPair span = spanMention.getKey();
if (!mentionSpanSet.contains(span) && !insideNE(span, namedEntitySpanSet)) {
int dummyMentionId = -1;
Mention m = new Mention(dummyMentionId, span.get(0), span.get(1), sent, basicDependency, enhancedDependency, new ArrayList<>(sent.subList(span.get(0), span.get(1))), spanMention.getValue());
mentions.add(m);
mentionSpanSet.add(span);
}
}
}
Aggregations