Search in sources :

Example 16 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ParsePhraseType method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView tree = (TreeView) ta.getView(parseViewname);
    Constituent phrase;
    try {
        phrase = tree.getParsePhrase(c);
    } catch (Exception e) {
        throw new EdisonException(e);
    }
    Set<Feature> features = new LinkedHashSet<>();
    if (phrase != null) {
        features.add(DiscreteFeature.create(phrase.getLabel()));
        String parentLabel = "ROOT";
        if (phrase.getIncomingRelations().size() > 0) {
            Constituent parent = phrase.getIncomingRelations().get(0).getSource();
            parentLabel = parent.getLabel();
            int parentHead = CollinsHeadFinder.getInstance().getHeadWordPosition(parent);
            features.add(DiscreteFeature.create("pt:h:" + ta.getToken(parentHead).toLowerCase().trim()));
            features.add(DiscreteFeature.create("pt:h-pos:" + WordHelpers.getPOS(ta, parentHead)));
        }
        features.add(DiscreteFeature.create("pt:" + parentLabel));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 17 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ParseSiblings method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView parse = (TreeView) ta.getView(parseViewName);
    Constituent phrase;
    try {
        phrase = parse.getParsePhrase(c);
    } catch (Exception e) {
        throw new EdisonException(e);
    }
    Set<Feature> features = new LinkedHashSet<>();
    if (phrase.getIncomingRelations().size() == 0) {
        features.add(DiscreteFeature.create("ONLY_CHILD"));
    } else {
        Relation incomingEdge = phrase.getIncomingRelations().get(0);
        Constituent parent = incomingEdge.getSource();
        int position = -1;
        for (int i = 0; i < parent.getOutgoingRelations().size(); i++) {
            if (parent.getOutgoingRelations().get(i) == incomingEdge) {
                position = i;
                break;
            }
        }
        assert position >= 0;
        if (position == 0)
            features.add(DiscreteFeature.create("FIRST_CHILD"));
        else if (position == parent.getOutgoingRelations().size() - 1)
            features.add(DiscreteFeature.create("LAST_CHILD"));
        if (position != 0) {
            Constituent sibling = parent.getOutgoingRelations().get(position - 1).getTarget();
            String phraseType = sibling.getLabel();
            int headWord = CollinsHeadFinder.getInstance().getHeadWordPosition(sibling);
            String token = ta.getToken(headWord).toLowerCase().trim();
            String pos = WordHelpers.getPOS(ta, headWord);
            features.add(DiscreteFeature.create("lsis.pt:" + phraseType));
            features.add(DiscreteFeature.create("lsis.hw:" + token));
            features.add(DiscreteFeature.create("lsis.hw.pos:" + pos));
        }
        if (position != parent.getOutgoingRelations().size() - 1) {
            Constituent sibling = parent.getOutgoingRelations().get(position + 1).getTarget();
            String phraseType = sibling.getLabel();
            int headWord = CollinsHeadFinder.getInstance().getHeadWordPosition(sibling);
            String token = ta.getToken(headWord).toLowerCase().trim();
            String pos = WordHelpers.getPOS(ta, headWord);
            features.add(DiscreteFeature.create("rsis.pt:" + phraseType));
            features.add(DiscreteFeature.create("rsis.hw:" + token));
            features.add(DiscreteFeature.create("rsis.hw.pos:" + pos));
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 18 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class SyntacticFrame method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    Set<Feature> features = new LinkedHashSet<>();
    List<Relation> incomingRelations = c.getIncomingRelations();
    if (incomingRelations.size() > 0) {
        Constituent pred = incomingRelations.get(0).getSource();
        TextAnnotation ta = c.getTextAnnotation();
        TreeView parse = (TreeView) ta.getView(parseViewName);
        Constituent predicate, arg;
        try {
            predicate = parse.getParsePhrase(pred);
            arg = parse.getParsePhrase(c);
        } catch (Exception e) {
            throw new EdisonException(e);
        }
        Constituent vp = TreeView.getParent(predicate);
        // go over VP's siblings before it
        StringBuffer sb1 = new StringBuffer();
        StringBuffer sb2 = new StringBuffer();
        StringBuffer sb3 = new StringBuffer();
        if (!TreeView.isRoot(vp)) {
            Constituent vpParent = TreeView.getParent(vp);
            for (int i = 0; i < vpParent.getOutgoingRelations().size(); i++) {
                Constituent target = vpParent.getOutgoingRelations().get(i).getTarget();
                if (target == vp)
                    break;
                addToFeature(target, arg, sb1, sb2, sb3);
            }
        }
        for (int i = 0; i < vp.getOutgoingRelations().size(); i++) {
            Constituent target = vp.getOutgoingRelations().get(i).getTarget();
            if (target.getSpan().equals(predicate.getSpan())) {
                sb1.append("v-");
                sb2.append("v-");
                sb3.append(WordHelpers.getLemma(ta, target.getStartSpan())).append("-");
            } else {
                addToFeature(target, arg, sb1, sb2, sb3);
            }
        }
        features.add(DiscreteFeature.create(sb1.toString()));
        features.add(DiscreteFeature.create("general:" + sb2.toString()));
        features.add(DiscreteFeature.create("lemma:" + sb3.toString()));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 19 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ParseHelper method getPhraseFromHead.

/**
 * Primarily a fix for prepSRL objects; converts them from single head words to constituents.
 * E.g. for the sentence "the man with the telescope", the object of the preposition will be
 * "the telescope" instead of just "telescope".
 *
 * @param predicate The predicate of the construction (e.g. "with")
 * @param argHead The head-word of the argument of the construction (e.g. "telescope")
 * @param parseViewName The name of the parse view used to extract the phrase-structure tree
 * @return The full constituent phrase containing the argument head
 */
public static Constituent getPhraseFromHead(Constituent predicate, Constituent argHead, String parseViewName) {
    // Get the path from the argument to the preposition
    // but only if the predicate node "m-commands" the arg
    TextAnnotation ta = argHead.getTextAnnotation();
    int sentenceOffset = ta.getSentence(ta.getSentenceId(argHead)).getStartSpan();
    int argStart = argHead.getStartSpan() - sentenceOffset;
    Tree<Pair<String, IntPair>> predParentTree = getTokenIndexedTreeCovering(predicate, parseViewName).getParent();
    boolean found = false;
    for (Tree<Pair<String, IntPair>> s : predParentTree.getYield()) {
        if (s.getLabel().getSecond().getFirst() == argStart)
            found = true;
    }
    if (!found)
        return null;
    // Now follow the path from the argument node to get to the preposition
    Tree<Pair<String, IntPair>> argPhrase = getTokenIndexedTreeCovering(argHead, parseViewName);
    while (!checkForPredicate(argPhrase.getParent(), predicate.getStartSpan() - sentenceOffset)) {
        if (argPhrase.getParent() == null)
            break;
        argPhrase = argPhrase.getParent();
    }
    // If the phrase covering the constituent is the whole sentence then the annotation is wrong
    if (argPhrase.getParent() == null)
        return null;
    int start = predicate.getStartSpan() + 1;
    int end = start + argPhrase.getYield().size();
    return new Constituent(argHead.getLabel(), argHead.getViewName(), argHead.getTextAnnotation(), start, end);
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) IntPair(edu.illinois.cs.cogcomp.core.datastructures.IntPair) Pair(edu.illinois.cs.cogcomp.core.datastructures.Pair)

Example 20 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ParseHelper method getParseTreeCovering.

/**
 * Get a parse tree from a text annotation that covers the specified constituent.
 *
 * @param parseViewName The name of the parse view
 * @param c The constituent that we care about
 * @return The portion of the parse tree of the {@link TextAnnotation} to which the constituent
 *         belongs which covers the constituent.
 */
public static Tree<String> getParseTreeCovering(String parseViewName, Constituent c) {
    TextAnnotation ta = c.getTextAnnotation();
    int sentenceId = ta.getSentenceId(c);
    Tree<String> tree = getParseTree(parseViewName, ta, sentenceId);
    int sentenceStartSpan = ta.getSentence(sentenceId).getStartSpan();
    int start = c.getStartSpan() - sentenceStartSpan;
    int end = c.getEndSpan() - sentenceStartSpan;
    return getTreeCovering(tree, start, end);
}
Also used : TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)292 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)121 Test (org.junit.Test)84 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)60 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)40 ArrayList (java.util.ArrayList)33 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)32 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)27 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)24 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 IOException (java.io.IOException)22 LinkedHashSet (java.util.LinkedHashSet)21 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)20 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)19 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)18 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)18 File (java.io.File)18 XmlTextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation)16