use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class NERServlet method outputHighlighting.
public void outputHighlighting(PrintWriter out, CRFClassifier classifier, String input) {
Set<String> labels = classifier.labels();
String background = classifier.backgroundSymbol();
List<List<CoreMap>> sentences = classifier.classify(input);
Map<String, Color> tagToColorMap = NERGUI.makeTagToColorMap(labels, background);
StringBuilder result = new StringBuilder();
int lastEndOffset = 0;
for (List<CoreMap> sentence : sentences) {
for (CoreMap word : sentence) {
int beginOffset = word.get(CoreAnnotations.CharacterOffsetBeginAnnotation.class);
int endOffset = word.get(CoreAnnotations.CharacterOffsetEndAnnotation.class);
String answer = word.get(CoreAnnotations.AnswerAnnotation.class);
if (beginOffset > lastEndOffset) {
result.append(StringEscapeUtils.escapeHtml4(input.substring(lastEndOffset, beginOffset)));
}
// Add a color bar for any tagged words
if (!background.equals(answer)) {
Color color = tagToColorMap.get(answer);
result.append("<span style=\"color:#ffffff;background:" + NERGUI.colorToHTML(color) + "\">");
}
result.append(StringEscapeUtils.escapeHtml4(input.substring(beginOffset, endOffset)));
// Turn off the color bar
if (!background.equals(answer)) {
result.append("</span>");
}
lastEndOffset = endOffset;
}
}
if (lastEndOffset < input.length()) {
result.append(StringEscapeUtils.escapeHtml4(input.substring(lastEndOffset)));
}
result.append("<br><br>");
result.append("Potential tags:");
for (String label : tagToColorMap.keySet()) {
result.append("<br> ");
Color color = tagToColorMap.get(label);
result.append("<span style=\"color:#ffffff;background:" + NERGUI.colorToHTML(color) + "\">");
result.append(StringEscapeUtils.escapeHtml4(label));
result.append("</span>");
}
out.print(result.toString());
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class MachineReading method assignSyntacticHeadToEntities.
private void assignSyntacticHeadToEntities(Annotation corpus) {
assert (corpus != null);
assert (corpus.get(SentencesAnnotation.class) != null);
for (CoreMap sent : corpus.get(SentencesAnnotation.class)) {
List<CoreLabel> tokens = sent.get(TokensAnnotation.class);
assert (tokens != null);
Tree tree = sent.get(TreeAnnotation.class);
if (MachineReadingProperties.forceGenerationOfIndexSpans) {
tree.indexSpans(0);
}
assert (tree != null);
if (sent.get(EntityMentionsAnnotation.class) != null) {
for (EntityMention e : sent.get(EntityMentionsAnnotation.class)) {
reader.assignSyntacticHead(e, tree, tokens, true);
}
}
}
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class AceReader method read.
/**
* Reads in ACE*.apf.xml files and converts them to RelationSentence objects.
* Note that you probably should call parse() instead.
*
* Currently, this ignores document boundaries (the list returned will include
* sentences from all documents).
*
* @param path directory containing ACE files to read (e.g.
* "/home/mcclosky/scr/data/ACE2005/english_test"). This can also be
* the path to a single file. *
* @return list of RelationSentence objects
*/
@Override
public Annotation read(String path) throws IOException, SAXException, ParserConfigurationException {
List<CoreMap> allSentences = new ArrayList<>();
File basePath = new File(path);
assert basePath.exists();
Annotation corpus = new Annotation("");
if (basePath.isDirectory()) {
for (File aceFile : IOUtils.iterFilesRecursive(basePath, ".apf.xml")) {
if (aceFile.getName().endsWith(".UPC1.apf.xml")) {
continue;
}
allSentences.addAll(readDocument(aceFile, corpus));
}
} else {
// in case it's a file
allSentences.addAll(readDocument(basePath, corpus));
}
AnnotationUtils.addSentences(corpus, allSentences);
// quick stats
if (VERBOSE) {
printCounter(entityCounts, "entity mention");
printCounter(relationCounts, "relation mention");
printCounter(eventCounts, "event mention");
}
for (CoreMap sent : allSentences) {
// check for entity mentions of the same type that are adjacent
countAdjacentMentions(sent);
// count relations between two proper nouns
countNameRelations(sent);
// count types of mentions
countMentionTypes(sent);
}
if (VERBOSE) {
printCounter(adjacentEntityMentions, "adjacent entity mention");
printCounter(nameRelationCounts, "name relation mention");
printCounter(mentionTypeCounts, "mention type counts");
}
return corpus;
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class CoreMapSequenceMatcher method getMergedList.
public List<CoreMap> getMergedList(int... groups) {
List<CoreMap> res = new ArrayList<>();
int last = 0;
List<Integer> orderedGroups = CollectionUtils.asList(groups);
Collections.sort(orderedGroups);
for (int group : orderedGroups) {
int groupStart = start(group);
if (groupStart >= last) {
res.addAll(elements.subList(last, groupStart));
int groupEnd = end(group);
if (groupEnd - groupStart >= 1) {
CoreMap merged = createMergedChunk(groupStart, groupEnd);
res.add(merged);
last = groupEnd;
}
}
}
res.addAll(elements.subList(last, elements.size()));
return res;
}
use of edu.stanford.nlp.util.CoreMap in project CoreNLP by stanfordnlp.
the class OpenIEServlet method annotate.
/**
* Annotate a document (which is usually just a sentence).
*/
public void annotate(StanfordCoreNLP pipeline, Annotation ann) {
if (ann.get(CoreAnnotations.SentencesAnnotation.class) == null) {
pipeline.annotate(ann);
} else {
if (ann.get(CoreAnnotations.SentencesAnnotation.class).size() == 1) {
CoreMap sentence = ann.get(CoreAnnotations.SentencesAnnotation.class).get(0);
for (CoreLabel token : sentence.get(CoreAnnotations.TokensAnnotation.class)) {
token.remove(NaturalLogicAnnotations.OperatorAnnotation.class);
token.remove(NaturalLogicAnnotations.PolarityAnnotation.class);
}
sentence.remove(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
sentence.remove(NaturalLogicAnnotations.EntailedSentencesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class);
sentence.remove(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class);
pipeline.annotate(ann);
}
}
}
Aggregations