use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class TextAnnotationLabelCounter method populateLabelCounts.
/**
* generate the target label/feature counts.
* @param annotationViews map from doc id to set of views containing the annotations (constituents, relations)
* that will be split.
*/
@Override
public void populateLabelCounts(Map<String, Set<View>> annotationViews) {
for (String docId : annotationViews.keySet()) {
Counter<String> docLabelCount = new Counter<>();
labelCounts.put(docId, docLabelCount);
for (View v : annotationViews.get(docId)) {
for (Relation r : v.getRelations()) {
String label = r.getRelationName();
if (useAllLabels || labelsToConsider.contains(label)) {
docLabelCount.incrementCount(label);
labelTotals.incrementCount(label);
}
}
for (Constituent c : v.getConstituents()) {
String label = c.getLabel();
if (useAllLabels || labelsToConsider.contains(label)) {
docLabelCount.incrementCount(label);
labelTotals.incrementCount(label);
}
}
}
}
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class CoNLL2002Writer method produceCoNLL2002Annotations.
/**
* Render a string representing the original data with embedded labels in the text.
*
* @param view the NER label view.
* @param ta the text annotation.
* @return the original text marked up with the annotations.
*/
private static String produceCoNLL2002Annotations(View view, TextAnnotation ta) {
StringBuilder sb = new StringBuilder();
// get the tokens.
List<Constituent> tokens = new ArrayList<>(ta.getView(ViewNames.TOKENS).getConstituents());
Collections.sort(tokens, TextAnnotationUtilities.constituentStartEndComparator);
// get the sentences.
List<Constituent> sentences = new ArrayList<>(ta.getView(ViewNames.SENTENCE).getConstituents());
Collections.sort(sentences, TextAnnotationUtilities.constituentStartEndComparator);
// get the entities
List<Constituent> entities = new ArrayList<>(view.getConstituents());
Collections.sort(entities, TextAnnotationUtilities.constituentStartEndComparator);
int entityindx = 0;
int sentenceindex = 0;
int sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
for (Constituent token : tokens) {
// make sure we have the next entity.
for (; entityindx < entities.size(); entityindx++) {
Constituent entity = entities.get(entityindx);
if (token.getStartCharOffset() <= entity.getStartCharOffset())
break;
else if (token.getEndCharOffset() <= entity.getEndCharOffset())
// we are inside of the entity.
break;
}
sb.append(token.getSurfaceForm());
sb.append(' ');
if (entityindx < entities.size()) {
Constituent entity = entities.get(entityindx);
if (token.getStartCharOffset() == entity.getStartCharOffset()) {
if (token.getEndCharOffset() == entity.getEndCharOffset()) {
sb.append("U-" + entity.getLabel());
} else if (token.getEndCharOffset() > entity.getEndCharOffset()) {
sb.append("U-" + entity.getLabel());
System.err.println("Odd. There is an entity enclosed within a single token!");
} else {
sb.append("B-" + entity.getLabel());
}
} else if (token.getStartCharOffset() > entity.getStartCharOffset()) {
if (token.getEndCharOffset() <= entity.getEndCharOffset()) {
sb.append("I-" + entity.getLabel());
} else {
sb.append('O');
}
} else {
sb.append('O');
}
} else {
sb.append('O');
}
sb.append('\n');
if (token.getEndCharOffset() >= sentenceEndIndex) {
sb.append('\n');
if (sentenceindex < (sentences.size() - 1))
sentenceindex++;
sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
}
}
return sb.toString();
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class NombankFields method createPredicate.
@Override
public Constituent createPredicate(TextAnnotation ta, String viewName, List<Tree<Pair<String, IntPair>>> yield) {
Tree<Pair<String, IntPair>> l = yield.get(predicateTerminal);
int start = l.getLabel().getSecond().getFirst();
Constituent predicate = new Constituent("Predicate", viewName, ta, start, start + 1);
predicate.addAttribute(PropbankReader.LemmaIdentifier, lemma);
predicate.addAttribute(PropbankReader.SenseIdentifier, sense);
return predicate;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class PropbankFields method createPredicate.
public Constituent createPredicate(TextAnnotation ta, String viewName, List<Tree<Pair<String, IntPair>>> yield) {
Tree<Pair<String, IntPair>> l = yield.get(predicateTerminal);
int start = l.getLabel().getSecond().getFirst();
Constituent predicate = new Constituent("Predicate", viewName, ta, start, start + 1);
predicate.addAttribute(PropbankReader.LemmaIdentifier, lemma);
predicate.addAttribute(PropbankReader.SenseIdentifier, sense);
predicate.addAttribute(PropbankReader.FormIdentifier, PropbankReader.Forms.getForm(inflection.charAt(0)).name());
predicate.addAttribute(PropbankReader.TenseIdentifier, PropbankReader.Tenses.getTense(inflection.charAt(1)).name());
predicate.addAttribute(PropbankReader.AspectIdentifier, PropbankReader.Aspects.getAspect(inflection.charAt(2)).name());
predicate.addAttribute(PropbankReader.PersonIdentifier, PropbankReader.Person.getPerson(inflection.charAt(3)).name());
predicate.addAttribute(PropbankReader.VoiceIdentifier, PropbankReader.Voices.getVoice(inflection.charAt(4)).name());
predicate.addAttribute(PropbankReader.Tagger, tagger);
return predicate;
}
use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.
the class GoldLabel method addArguments.
private void addArguments(TextAnnotation ta, Constituent predicate, List<Constituent> args, List<String> labels, List<Double> scores, GoldLabel arg, List<Constituent> aa) {
String label = arg.label;
label = convertToCoNLL(label);
if (label.equals("rel")) {
for (Constituent c : aa) {
if (c.getStartSpan() == predicate.getStartSpan() && c.getEndSpan() == predicate.getEndSpan())
continue;
else if (c.getStartSpan() <= predicate.getStartSpan() && c.getEndSpan() <= predicate.getEndSpan()) {
int c1Start = c.getStartSpan();
int c1End = predicate.getStartSpan();
if (c1Start != c1End) {
args.add(new Constituent("C-V", srlViewName, ta, c1Start, c1End));
labels.add("C-V");
scores.add(1.0);
}
int c2Start = predicate.getEndSpan();
int c2End = c.getEndSpan();
if (c2Start != c2End) {
args.add(new Constituent("C-V", srlViewName, ta, c2Start, c2End));
labels.add("C-V");
scores.add(1.0);
}
} else if (c.getStartSpan() == predicate.getStartSpan() && c.getEndSpan() > predicate.getEndSpan()) {
int start = predicate.getEndSpan();
int end = c.getEndSpan();
args.add(new Constituent("C-V", srlViewName, ta, start, end));
labels.add("C-V");
scores.add(1.0);
} else {
args.add(new Constituent("C-V", srlViewName, ta, c.getStartSpan(), c.getEndSpan()));
labels.add("C-V");
scores.add(1.0);
}
}
} else {
for (Constituent c : aa) {
args.add(c);
labels.add(convertToCoNLL(c.getLabel()));
scores.add(1.0);
}
}
}
Aggregations