Search in sources :

Example 1 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class PrepSRLAnnotator method addView.

@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
    List<Constituent> candidates = new ArrayList<>();
    for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) {
        int tokenId = c.getStartSpan();
        if (PrepSRLDataReader.isPrep(ta, tokenId))
            candidates.add(c.cloneForNewViewWithDestinationLabel(viewName, DataReader.CANDIDATE));
        // Now check bigrams & trigrams
        Constituent multiWordPrep = PrepSRLDataReader.isBigramPrep(ta, tokenId, viewName);
        if (multiWordPrep != null)
            candidates.add(multiWordPrep);
        multiWordPrep = PrepSRLDataReader.isTrigramPrep(ta, tokenId, viewName);
        if (multiWordPrep != null)
            candidates.add(multiWordPrep);
    }
    SpanLabelView prepositionLabelView = new SpanLabelView(viewName, viewName + "-annotator", ta, 1.0, true);
    for (Constituent c : candidates) {
        String role = classifier.discreteValue(c);
        if (!role.equals(DataReader.CANDIDATE))
            prepositionLabelView.addSpanLabel(c.getStartSpan(), c.getEndSpan(), role, 1.0);
    }
    ta.addView(viewName, prepositionLabelView);
}
Also used : ArrayList(java.util.ArrayList) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 2 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class CorefMUCEvaluator method evaluate.

public void evaluate(ClassificationTester tester, View goldView, View predictionView) {
    this.gold = (CoreferenceView) goldView;
    this.prediction = (CoreferenceView) predictionView;
    // Recall = \sum_i [ |si| - |pOfsi| ] / \sum_i [ |si| - 1 ]
    // where si is a true cluster, pOfsi is the set of predicted
    // clusters that contain elements of si (i.e. number of predicted clusters having some
    // overlap with
    // this gold cluster)
    int numerator1 = 0;
    int denominator1 = 0;
    for (Constituent goldCanonicalCons : gold.getCanonicalEntitiesViaRelations()) {
        HashSet<Constituent> consInGoldCluster = new HashSet(gold.getCoreferentMentionsViaRelations(goldCanonicalCons));
        for (Constituent predCanonicalCons : prediction.getCanonicalEntitiesViaRelations()) {
            HashSet<Constituent> consInPredCluster = new HashSet(prediction.getCoreferentMentionsViaRelations(predCanonicalCons));
            Set<Constituent> intersection = new HashSet();
            for (Constituent cGold : consInGoldCluster) {
                for (Constituent cPred : consInPredCluster) if (cPred.equalsWithoutAttributeEqualityCheck(cGold))
                    intersection.add(cGold);
            }
            if (!intersection.isEmpty())
                numerator1 -= 1;
        }
        numerator1 += consInGoldCluster.size();
        denominator1 += consInGoldCluster.size() - 1;
    }
    double recall = 1.0 * numerator1 / denominator1;
    // Precision is defined dually by reversing the roles of gold and prediction
    // Precision = \sum_i [ |siprime| - |pOfsiprime| ] / \sum_i [ |siprime| - 1 ]
    // where siprime is a predicted cluster, pOfsiprime is the set of
    // true clusters that contain elements of siprime.
    int numerator2 = 0;
    int denominator2 = 0;
    for (Constituent predCanonicalCons : prediction.getCanonicalEntitiesViaRelations()) {
        HashSet<Constituent> consInPredCluster = new HashSet(prediction.getCoreferentMentionsViaRelations(predCanonicalCons));
        for (Constituent goldCanonicalCons : gold.getCanonicalEntitiesViaRelations()) {
            HashSet<Constituent> consInGoldCluster = new HashSet(gold.getCoreferentMentionsViaRelations(goldCanonicalCons));
            Set<Constituent> intersection = new HashSet();
            for (Constituent cPred : consInPredCluster) {
                for (Constituent cGold : consInGoldCluster) {
                    if (cGold.equalsWithoutAttributeEqualityCheck(cPred))
                        intersection.add(cGold);
                }
            }
            if (!intersection.isEmpty())
                numerator2 -= 1;
        }
        numerator2 += consInPredCluster.size();
        denominator2 += consInPredCluster.size() - 1;
    }
    double precision = 1.0 * numerator2 / denominator2;
    assert (numerator1 == numerator2);
    tester.recordCount("coref", denominator1, denominator2, numerator1);
}
Also used : Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) HashSet(java.util.HashSet)

Example 3 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class AnnotationFixer method rationalizeBoundaryAnnotations.

/**
 * correct automated annotations (tokenization, sentence splitting) based on gold annotations of entity mentions
 * @param ta TextAnnotation with annotated clean text
 * @return a map of view names to indexes indicating where violations were found/corrected
 */
public static Map<String, Set<Integer>> rationalizeBoundaryAnnotations(TextAnnotation ta, String viewName) {
    Map<String, Set<Integer>> violations = new HashMap<>();
    Set<Integer> badSentenceStartIndexes = new HashSet<>();
    violations.put(ViewNames.SENTENCE, badSentenceStartIndexes);
    View sentences = ta.getView(ViewNames.SENTENCE);
    TreeMap<Integer, Constituent> sentenceStarts = new TreeMap<>();
    for (Constituent s : sentences) sentenceStarts.put(s.getStartSpan(), s);
    Set<Pair<Constituent, Constituent>> sentencesToMerge = new HashSet<>();
    View nerMention = ta.getView(viewName);
    for (Constituent m : nerMention.getConstituents()) {
        Constituent lastSent = null;
        for (int sentStart : sentenceStarts.keySet()) {
            int mentEnd = m.getEndSpan();
            if (// ordered sentence list, so stop after
            sentStart > mentEnd)
                break;
            Constituent currentSent = sentenceStarts.get(sentStart);
            int mentStart = m.getStartSpan();
            if (sentStart > mentStart && sentStart < mentEnd) {
                sentencesToMerge.add(new Pair(lastSent, currentSent));
                badSentenceStartIndexes.add(sentStart);
            }
            lastSent = currentSent;
        }
    }
    Set<Integer> sentStartsProcessed = new HashSet<>();
    for (Pair<Constituent, Constituent> sentPair : sentencesToMerge) {
        Constituent first = sentPair.getFirst();
        Constituent second = sentPair.getSecond();
        int firstStart = first.getStartSpan();
        int secondStart = second.getStartSpan();
        if (sentStartsProcessed.contains(firstStart) || sentStartsProcessed.contains(secondStart)) {
            throw new IllegalStateException("more complex boundary constraints than I can currently handle -- " + "more than two consecutive sentences with boundary errors.");
        }
        Constituent combinedSent = null;
        if (null == first.getLabelsToScores())
            combinedSent = new Constituent(first.getLabel(), first.getConstituentScore(), ViewNames.SENTENCE, first.getTextAnnotation(), first.getStartSpan(), second.getEndSpan());
        else
            combinedSent = new Constituent(first.getLabelsToScores(), ViewNames.SENTENCE, first.getTextAnnotation(), first.getStartSpan(), second.getEndSpan());
        for (String k : first.getAttributeKeys()) {
            combinedSent.addAttribute(k, first.getAttribute(k));
        }
        for (String k : second.getAttributeKeys()) {
            combinedSent.addAttribute(k, first.getAttribute(k));
        }
        sentences.removeConstituent(first);
        sentences.removeConstituent(second);
        sentences.addConstituent(combinedSent);
    }
    ta.setSentences();
    return violations;
}
Also used : View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 4 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class SimpleGazetteerAnnotator method addView.

/**
 * The view will consist of potentially overlapping constituents representing those tokens that
 * matched entries in the gazetteers. Some tokens will match against several gazetteers.
 */
@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
    View view = ta.getView(ViewNames.TOKENS);
    List<Constituent> constituents = view.getConstituents();
    SpanLabelView slv = new SpanLabelView(this.getViewName(), this.getClass().getName(), ta, 1d, true);
    for (int constindx = 0; constindx < constituents.size(); constindx++) {
        for (int dictindx = 0; dictindx < dictionaries.size(); dictindx++) {
            dictionaries.get(dictindx).match(constituents, constindx, slv);
            dictionariesIgnoreCase.get(dictindx).match(constituents, constindx, slv);
        }
    }
    ta.addView(slv.getViewName(), slv);
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 5 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class CurrencyIndicator method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    try {
        if (!loaded)
            synchronized (this) {
                // now its changed to be loaded from datastore.
                if (!loaded)
                    loadCurrency(gzip, true);
            }
    } catch (Exception ex) {
        throw new EdisonException(ex);
    }
    TextAnnotation ta = c.getTextAnnotation();
    if (!ta.hasView(VIEW_NAME)) {
        try {
            addCurrencyView(ta);
        } catch (Exception e) {
            e.printStackTrace();
        }
    }
    SpanLabelView view = (SpanLabelView) ta.getView(VIEW_NAME);
    Set<Feature> features = new LinkedHashSet<>();
    for (Constituent cc : view.where(Queries.containedInConstituent(c))) {
        if (cc.getEndSpan() == c.getEndSpan()) {
            if (cc.getStartSpan() - 1 > c.getEndSpan()) {
                // check if this is a number
                if (WordLists.NUMBERS.contains(ta.getToken(cc.getStartSpan() - 1).toLowerCase())) {
                    features.add(CURRENCY);
                    break;
                }
            }
        } else if (WordFeatureExtractorFactory.numberNormalizer.getWordFeatures(ta, cc.getEndSpan()).size() > 0) {
            features.add(CURRENCY);
            break;
        }
    }
    return features;
}
Also used : EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) URISyntaxException(java.net.URISyntaxException) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)227 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)121 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)66 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)43 ArrayList (java.util.ArrayList)37 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)28 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)25 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)22 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)21 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)18 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)18 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)14 HashSet (java.util.HashSet)13