Search in sources :

Example 31 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class TextAnnotationLabelCounter method populateLabelCounts.

/**
 * generate the target label/feature counts.
 * @param annotationViews map from doc id to set of views containing the annotations (constituents, relations)
 *                        that will be split.
 */
@Override
public void populateLabelCounts(Map<String, Set<View>> annotationViews) {
    for (String docId : annotationViews.keySet()) {
        Counter<String> docLabelCount = new Counter<>();
        labelCounts.put(docId, docLabelCount);
        for (View v : annotationViews.get(docId)) {
            for (Relation r : v.getRelations()) {
                String label = r.getRelationName();
                if (useAllLabels || labelsToConsider.contains(label)) {
                    docLabelCount.incrementCount(label);
                    labelTotals.incrementCount(label);
                }
            }
            for (Constituent c : v.getConstituents()) {
                String label = c.getLabel();
                if (useAllLabels || labelsToConsider.contains(label)) {
                    docLabelCount.incrementCount(label);
                    labelTotals.incrementCount(label);
                }
            }
        }
    }
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) Counter(edu.illinois.cs.cogcomp.core.stats.Counter) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 32 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class CoNLL2002Writer method produceCoNLL2002Annotations.

/**
 * Render a string representing the original data with embedded labels in the text.
 *
 * @param view the NER label view.
 * @param ta the text annotation.
 * @return the original text marked up with the annotations.
 */
private static String produceCoNLL2002Annotations(View view, TextAnnotation ta) {
    StringBuilder sb = new StringBuilder();
    // get the tokens.
    List<Constituent> tokens = new ArrayList<>(ta.getView(ViewNames.TOKENS).getConstituents());
    Collections.sort(tokens, TextAnnotationUtilities.constituentStartEndComparator);
    // get the sentences.
    List<Constituent> sentences = new ArrayList<>(ta.getView(ViewNames.SENTENCE).getConstituents());
    Collections.sort(sentences, TextAnnotationUtilities.constituentStartEndComparator);
    // get the entities
    List<Constituent> entities = new ArrayList<>(view.getConstituents());
    Collections.sort(entities, TextAnnotationUtilities.constituentStartEndComparator);
    int entityindx = 0;
    int sentenceindex = 0;
    int sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
    for (Constituent token : tokens) {
        // make sure we have the next entity.
        for (; entityindx < entities.size(); entityindx++) {
            Constituent entity = entities.get(entityindx);
            if (token.getStartCharOffset() <= entity.getStartCharOffset())
                break;
            else if (token.getEndCharOffset() <= entity.getEndCharOffset())
                // we are inside of the entity.
                break;
        }
        sb.append(token.getSurfaceForm());
        sb.append(' ');
        if (entityindx < entities.size()) {
            Constituent entity = entities.get(entityindx);
            if (token.getStartCharOffset() == entity.getStartCharOffset()) {
                if (token.getEndCharOffset() == entity.getEndCharOffset()) {
                    sb.append("U-" + entity.getLabel());
                } else if (token.getEndCharOffset() > entity.getEndCharOffset()) {
                    sb.append("U-" + entity.getLabel());
                    System.err.println("Odd. There is an entity enclosed within a single token!");
                } else {
                    sb.append("B-" + entity.getLabel());
                }
            } else if (token.getStartCharOffset() > entity.getStartCharOffset()) {
                if (token.getEndCharOffset() <= entity.getEndCharOffset()) {
                    sb.append("I-" + entity.getLabel());
                } else {
                    sb.append('O');
                }
            } else {
                sb.append('O');
            }
        } else {
            sb.append('O');
        }
        sb.append('\n');
        if (token.getEndCharOffset() >= sentenceEndIndex) {
            sb.append('\n');
            if (sentenceindex < (sentences.size() - 1))
                sentenceindex++;
            sentenceEndIndex = sentences.get(sentenceindex).getEndCharOffset();
        }
    }
    return sb.toString();
}
Also used : ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 33 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class NombankFields method createPredicate.

@Override
public Constituent createPredicate(TextAnnotation ta, String viewName, List<Tree<Pair<String, IntPair>>> yield) {
    Tree<Pair<String, IntPair>> l = yield.get(predicateTerminal);
    int start = l.getLabel().getSecond().getFirst();
    Constituent predicate = new Constituent("Predicate", viewName, ta, start, start + 1);
    predicate.addAttribute(PropbankReader.LemmaIdentifier, lemma);
    predicate.addAttribute(PropbankReader.SenseIdentifier, sense);
    return predicate;
}
Also used : Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 34 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class PropbankFields method createPredicate.

public Constituent createPredicate(TextAnnotation ta, String viewName, List<Tree<Pair<String, IntPair>>> yield) {
    Tree<Pair<String, IntPair>> l = yield.get(predicateTerminal);
    int start = l.getLabel().getSecond().getFirst();
    Constituent predicate = new Constituent("Predicate", viewName, ta, start, start + 1);
    predicate.addAttribute(PropbankReader.LemmaIdentifier, lemma);
    predicate.addAttribute(PropbankReader.SenseIdentifier, sense);
    predicate.addAttribute(PropbankReader.FormIdentifier, PropbankReader.Forms.getForm(inflection.charAt(0)).name());
    predicate.addAttribute(PropbankReader.TenseIdentifier, PropbankReader.Tenses.getTense(inflection.charAt(1)).name());
    predicate.addAttribute(PropbankReader.AspectIdentifier, PropbankReader.Aspects.getAspect(inflection.charAt(2)).name());
    predicate.addAttribute(PropbankReader.PersonIdentifier, PropbankReader.Person.getPerson(inflection.charAt(3)).name());
    predicate.addAttribute(PropbankReader.VoiceIdentifier, PropbankReader.Voices.getVoice(inflection.charAt(4)).name());
    predicate.addAttribute(PropbankReader.Tagger, tagger);
    return predicate;
}
Also used : Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 35 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class GoldLabel method addArguments.

private void addArguments(TextAnnotation ta, Constituent predicate, List<Constituent> args, List<String> labels, List<Double> scores, GoldLabel arg, List<Constituent> aa) {
    String label = arg.label;
    label = convertToCoNLL(label);
    if (label.equals("rel")) {
        for (Constituent c : aa) {
            if (c.getStartSpan() == predicate.getStartSpan() && c.getEndSpan() == predicate.getEndSpan())
                continue;
            else if (c.getStartSpan() <= predicate.getStartSpan() && c.getEndSpan() <= predicate.getEndSpan()) {
                int c1Start = c.getStartSpan();
                int c1End = predicate.getStartSpan();
                if (c1Start != c1End) {
                    args.add(new Constituent("C-V", srlViewName, ta, c1Start, c1End));
                    labels.add("C-V");
                    scores.add(1.0);
                }
                int c2Start = predicate.getEndSpan();
                int c2End = c.getEndSpan();
                if (c2Start != c2End) {
                    args.add(new Constituent("C-V", srlViewName, ta, c2Start, c2End));
                    labels.add("C-V");
                    scores.add(1.0);
                }
            } else if (c.getStartSpan() == predicate.getStartSpan() && c.getEndSpan() > predicate.getEndSpan()) {
                int start = predicate.getEndSpan();
                int end = c.getEndSpan();
                args.add(new Constituent("C-V", srlViewName, ta, start, end));
                labels.add("C-V");
                scores.add(1.0);
            } else {
                args.add(new Constituent("C-V", srlViewName, ta, c.getStartSpan(), c.getEndSpan()));
                labels.add("C-V");
                scores.add(1.0);
            }
        }
    } else {
        for (Constituent c : aa) {
            args.add(c);
            labels.add(convertToCoNLL(c.getLabel()));
            scores.add(1.0);
        }
    }
}
Also used : Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)227 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)121 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)66 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)43 ArrayList (java.util.ArrayList)37 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)28 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)25 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)22 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)21 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)18 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)18 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)14 HashSet (java.util.HashSet)13