Search in sources :

Example 1 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class AnnotationFixer method rationalizeBoundaryAnnotations.

/**
     * correct automated annotations (tokenization, sentence splitting) based on gold annotations of entity mentions
     * @param ta TextAnnotation with annotated clean text
     * @return a map of view names to indexes indicating where violations were found/corrected
     */
public static Map<String, Set<Integer>> rationalizeBoundaryAnnotations(TextAnnotation ta, String viewName) {
    Map<String, Set<Integer>> violations = new HashMap<>();
    Set<Integer> badSentenceStartIndexes = new HashSet<>();
    violations.put(ViewNames.SENTENCE, badSentenceStartIndexes);
    View sentences = ta.getView(ViewNames.SENTENCE);
    TreeMap<Integer, Constituent> sentenceStarts = new TreeMap<>();
    for (Constituent s : sentences) sentenceStarts.put(s.getStartSpan(), s);
    Set<Pair<Constituent, Constituent>> sentencesToMerge = new HashSet<>();
    View nerMention = ta.getView(viewName);
    for (Constituent m : nerMention.getConstituents()) {
        Constituent lastSent = null;
        for (int sentStart : sentenceStarts.keySet()) {
            int mentEnd = m.getEndSpan();
            if (// ordered sentence list, so stop after
            sentStart > mentEnd)
                break;
            Constituent currentSent = sentenceStarts.get(sentStart);
            int mentStart = m.getStartSpan();
            if (sentStart > mentStart && sentStart < mentEnd) {
                sentencesToMerge.add(new Pair(lastSent, currentSent));
                badSentenceStartIndexes.add(sentStart);
            }
            lastSent = currentSent;
        }
    }
    Set<Integer> sentStartsProcessed = new HashSet<>();
    for (Pair<Constituent, Constituent> sentPair : sentencesToMerge) {
        Constituent first = sentPair.getFirst();
        Constituent second = sentPair.getSecond();
        int firstStart = first.getStartSpan();
        int secondStart = second.getStartSpan();
        if (sentStartsProcessed.contains(firstStart) || sentStartsProcessed.contains(secondStart)) {
            throw new IllegalStateException("more complex boundary constraints than I can currently handle -- " + "more than two consecutive sentences with boundary errors.");
        }
        Constituent combinedSent = null;
        if (null == first.getLabelsToScores())
            combinedSent = new Constituent(first.getLabel(), first.getConstituentScore(), ViewNames.SENTENCE, first.getTextAnnotation(), first.getStartSpan(), second.getEndSpan());
        else
            combinedSent = new Constituent(first.getLabelsToScores(), ViewNames.SENTENCE, first.getTextAnnotation(), first.getStartSpan(), second.getEndSpan());
        for (String k : first.getAttributeKeys()) {
            combinedSent.addAttribute(k, first.getAttribute(k));
        }
        for (String k : second.getAttributeKeys()) {
            combinedSent.addAttribute(k, first.getAttribute(k));
        }
        sentences.removeConstituent(first);
        sentences.removeConstituent(second);
        sentences.addConstituent(combinedSent);
    }
    ta.setSentences();
    return violations;
}
Also used : View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 2 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class BasicAnnotatorService method addView.

/**
     * DOES NOT CACHE THE ADDED VIEW!!!
     *
     * @param textAnnotation textAnnotation to be modified
     * @param viewName       name of view to be added
     * @return 'true' if textAnnotation was modified
     * @throws AnnotatorException
     */
@Override
public boolean addView(TextAnnotation textAnnotation, String viewName) throws AnnotatorException {
    boolean isUpdated = false;
    if (ViewNames.SENTENCE.equals(viewName) || ViewNames.TOKENS.equals(viewName))
        return false;
    if (!textAnnotation.hasView(viewName) || forceUpdate) {
        isUpdated = true;
        if (!viewProviders.containsKey(viewName))
            throw new AnnotatorException("View '" + viewName + "' cannot be provided by this AnnotatorService.");
        Annotator annotator = viewProviders.get(viewName);
        for (String prereqView : annotator.getRequiredViews()) {
            addView(textAnnotation, prereqView);
        }
        View v = annotator.getView(textAnnotation);
        textAnnotation.addView(annotator.getViewName(), v);
    }
    if (isUpdated && throwExceptionIfNotCached)
        throwNotCachedException(textAnnotation.getCorpusId(), textAnnotation.getId(), textAnnotation.getText());
    return isUpdated;
}
Also used : View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)

Example 3 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class AnnotationFixerTest method testAnnotationFixer.

@Test
public void testAnnotationFixer() {
    TextAnnotation ta = DummyTextAnnotationGenerator.generateAnnotatedTextAnnotation(false, 3);
    int sentStart = ta.getView(ViewNames.SENTENCE).getConstituents().get(1).getStartSpan();
    int cStart = sentStart - 2;
    int cEnd = sentStart + 1;
    // first, run Fixer with existing view (which must respect sentence boundaries
    AnnotationFixer.rationalizeBoundaryAnnotations(ta, ViewNames.PSEUDO_PARSE_STANFORD);
    assertEquals(3, ta.getNumberOfSentences());
    View constrainingView = new View(VNAME, VNAME, ta, 1.0);
    ta.addView(VNAME, constrainingView);
    Constituent c = new Constituent("CONSTRAINT", VNAME, ta, cStart, cEnd);
    constrainingView.addConstituent(c);
    AnnotationFixer.rationalizeBoundaryAnnotations(ta, VNAME);
    assertEquals(2, ta.getNumberOfSentences());
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) Test(org.junit.Test)

Example 4 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class JsonSerializerTest method verifySerializedJSONObject.

/** Behavior specific to unit tests only. Use with caution */
public static void verifySerializedJSONObject(JsonObject jobj, TextAnnotation ta) {
    assertNotNull(jobj);
    JsonArray jsonTokenOffsets = jobj.get(JsonSerializer.TOKENOFFSETS).getAsJsonArray();
    assertNotNull(jsonTokenOffsets);
    assertEquals(ta.getTokens().length, jsonTokenOffsets.size());
    Map<IntPair, String> offsetForms = new HashMap<>();
    for (int i = 0; i < jsonTokenOffsets.size(); ++i) {
        JsonObject offset = (JsonObject) jsonTokenOffsets.get(i);
        int start = offset.get(JsonSerializer.STARTCHAROFFSET).getAsInt();
        int end = offset.get(JsonSerializer.ENDCHAROFFSET).getAsInt();
        String form = offset.get(JsonSerializer.FORM).getAsString();
        offsetForms.put(new IntPair(start, end), form);
    }
    Constituent seventhToken = ta.getView(ViewNames.TOKENS).getConstituents().get(6);
    IntPair tokCharOffsets = new IntPair(seventhToken.getStartCharOffset(), seventhToken.getEndCharOffset());
    String seventhTokenForm = seventhToken.getSurfaceForm();
    String deserializedForm = offsetForms.get(tokCharOffsets);
    assertNotNull(deserializedForm);
    assertEquals(seventhTokenForm, deserializedForm);
    Constituent thirdPos = ta.getView(ViewNames.POS).getConstituents().get(3);
    assertEquals(null, thirdPos.getLabelsToScores());
    View rhymeRecons = ta.getView("rhyme");
    assertNotNull(rhymeRecons);
    Relation r = rhymeRecons.getRelations().get(0);
    Map<String, Double> relLabelScores = r.getLabelsToScores();
    assertNotNull(relLabelScores);
    assertEquals(2, relLabelScores.size());
    Constituent c = r.getSource();
    Map<String, Double> cLabelScores = c.getLabelsToScores();
    assertNotNull(cLabelScores);
    assertEquals(4, cLabelScores.size());
}
Also used : JsonArray(com.google.gson.JsonArray) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) HashMap(java.util.HashMap) JsonObject(com.google.gson.JsonObject) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 5 with View

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.View in project cogcomp-nlp by CogComp.

the class JsonSerializerTest method testSerializerWithCharOffsets.

@Test
public void testSerializerWithCharOffsets() {
    View rhymeView = new View("rhyme", "test", ta, 0.4);
    Map<String, Double> newLabelsToScores = new TreeMap<String, Double>();
    String[] labels = { "eeny", "meeny", "miny", "mo" };
    double[] scores = { 0.15, 0.15, 0.3, 0.4 };
    for (int i = 0; i < labels.length; ++i) newLabelsToScores.put(labels[i], scores[i]);
    Constituent first = new Constituent(newLabelsToScores, "rhyme", ta, 2, 4);
    rhymeView.addConstituent(first);
    /**
         * no constraint on scores -- don't have to sum to 1.0
         */
    for (int i = labels.length - 1; i > 0; --i) newLabelsToScores.put(labels[i], scores[3 - i]);
    Constituent second = new Constituent(newLabelsToScores, "rhyme", ta, 2, 4);
    rhymeView.addConstituent(second);
    Map<String, Double> relLabelsToScores = new TreeMap<>();
    relLabelsToScores.put("Yes", 0.8);
    relLabelsToScores.put("No", 0.2);
    Relation rel = new Relation(relLabelsToScores, first, second);
    rhymeView.addRelation(rel);
    ta.addView("rhyme", rhymeView);
    String taJson = SerializationHelper.serializeToJson(ta, true);
    logger.info(taJson);
    JsonObject jobj = (JsonObject) new JsonParser().parse(taJson);
    JsonSerializerTest.verifySerializedJSONObject(jobj, ta);
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) JsonObject(com.google.gson.JsonObject) TreeMap(java.util.TreeMap) View(edu.illinois.cs.cogcomp.core.datastructures.textannotation.View) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) JsonParser(com.google.gson.JsonParser) Test(org.junit.Test)

Aggregations

View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)64 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)51 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)49 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)22 Test (org.junit.Test)21 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)16 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)7 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)7 ArrayList (java.util.ArrayList)7 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)6 LinkedHashSet (java.util.LinkedHashSet)6 Set (java.util.Set)6 POSBaseLineCounter (edu.illinois.cs.cogcomp.edison.utilities.POSBaseLineCounter)5 POSMikheevCounter (edu.illinois.cs.cogcomp.edison.utilities.POSMikheevCounter)5 IOException (java.io.IOException)5 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)4 JsonObject (com.google.gson.JsonObject)3