Search in sources :

Example 11 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class GetParseRightSibling method transform.

@Override
public List<Constituent> transform(Constituent input) {
    TextAnnotation ta = input.getTextAnnotation();
    TreeView parse = (TreeView) ta.getView(parseViewName);
    List<Constituent> siblings = new ArrayList<>();
    try {
        Constituent phrase = parse.getParsePhrase(input);
        List<Relation> in = phrase.getIncomingRelations();
        if (in.size() > 0) {
            List<Relation> outgoingRelations = in.get(0).getSource().getOutgoingRelations();
            int id = -1;
            for (int i = 0; i < outgoingRelations.size(); i++) {
                Relation r = outgoingRelations.get(i);
                if (r.getTarget() == phrase) {
                    id = i;
                    break;
                }
            }
            if (id >= 0 && id + 1 < outgoingRelations.size())
                siblings.add(outgoingRelations.get(id + 1).getTarget());
        }
    } catch (EdisonException e) {
        throw new RuntimeException(e);
    } catch (Exception e) {
        e.printStackTrace();
    }
    return siblings;
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) ArrayList(java.util.ArrayList) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 12 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class LabelOneBefore method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    String classifier;
    String prefix = "LabelOneBefore";
    TextAnnotation ta = c.getTextAnnotation();
    int start = c.getStartSpan() - 1;
    int end = c.getEndSpan() - 1;
    Set<Feature> features = new LinkedHashSet<>();
    for (int i = start; i < end; i++) {
        if (!isPOSFromCounting) {
            classifier = prefix + "_" + "POS";
            if (i >= 0) {
                TokenLabelView POSView = (TokenLabelView) ta.getView(ViewNames.POS);
                String form = ta.getToken(i);
                String tag = POSView.getLabel(i);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        } else if (isBaseLineCounting) {
            classifier = prefix + "_" + "BaselinePOS";
            if (i >= 0) {
                String form = ta.getToken(i);
                String tag = counter.tag(i, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        } else {
            classifier = prefix + "_" + "MikheevPOS";
            if (i >= 0) {
                String form = ta.getToken(i);
                String tag = counter.tag(i, ta);
                features.add(new DiscreteFeature(classifier + ":" + tag + "_" + form));
            } else
                features.add(new DiscreteFeature(classifier + ":" + ""));
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TokenLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature)

Example 13 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class DependencyPath method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView parse = (TreeView) ta.getView(dependencyViewName);
    Constituent c1 = parse.getConstituentsCoveringToken(c.getIncomingRelations().get(0).getSource().getStartSpan()).get(0);
    Constituent c2 = parse.getConstituentsCoveringToken(c.getStartSpan()).get(0);
    Pair<List<Constituent>, List<Constituent>> paths = PathFeatureHelper.getPathsToCommonAncestor(c1, c2, 400);
    int length = paths.getFirst().size() + paths.getSecond().size() - 1;
    StringBuilder path = new StringBuilder();
    StringBuilder pos = new StringBuilder();
    for (int i = 0; i < paths.getFirst().size() - 1; i++) {
        Constituent cc = paths.getFirst().get(i);
        path.append(cc.getIncomingRelations().get(0).getRelationName()).append(PathFeatureHelper.PATH_UP_STRING);
        pos.append(WordHelpers.getPOS(ta, cc.getStartSpan()));
        pos.append(cc.getIncomingRelations().get(0).getRelationName()).append(PathFeatureHelper.PATH_UP_STRING);
    }
    Constituent top = paths.getFirst().get(paths.getFirst().size() - 1);
    pos.append(WordHelpers.getPOS(ta, top.getStartSpan()));
    pos.append("*");
    path.append("*");
    if (paths.getSecond().size() > 1) {
        for (int i = paths.getSecond().size() - 2; i >= 0; i--) {
            Constituent cc = paths.getSecond().get(i);
            pos.append(WordHelpers.getPOS(ta, cc.getStartSpan()));
            pos.append(PathFeatureHelper.PATH_DOWN_STRING);
            path.append(PathFeatureHelper.PATH_DOWN_STRING);
        }
    }
    Set<Feature> features = new LinkedHashSet<>();
    features.add(DiscreteFeature.create(path.toString()));
    features.add(DiscreteFeature.create("pos" + pos.toString()));
    features.add(RealFeature.create("l", length));
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) List(java.util.List) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) RealFeature(edu.illinois.cs.cogcomp.edison.features.RealFeature) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 14 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class NomLexClassFeature method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    int tokenId = c.getEndSpan() - 1;
    TextAnnotation ta = c.getTextAnnotation();
    String predicateWord = ta.getToken(tokenId).toLowerCase().trim();
    String predicateLemma;
    if (c.hasAttribute(PredicateArgumentView.LemmaIdentifier))
        predicateLemma = c.getAttribute(PredicateArgumentView.LemmaIdentifier);
    else
        predicateLemma = WordHelpers.getLemma(ta, tokenId);
    NomLexReader nomLex = NomLexReader.getInstance();
    List<NomLexEntry> nomLexEntries = nomLex.getNomLexEntries(predicateWord, predicateLemma);
    Set<Feature> features = new LinkedHashSet<>();
    if (nomLexEntries.size() > 0) {
        for (NomLexEntry e : nomLexEntries) {
            features.add(DiscreteFeature.create("nom-cls:" + e.nomClass));
            if (NomLexEntry.VERBAL.contains(e.nomClass)) {
                features.add(DE_VERBAL);
                features.add(DiscreteFeature.create("nom-vb:" + e.verb));
            } else if (NomLexEntry.ADJECTIVAL.contains(e.nomClass)) {
                features.add(DE_ADJECTIVAL);
                features.add(DiscreteFeature.create("nom-adj:" + e.adj));
            }
        }
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) NomLexEntry(edu.illinois.cs.cogcomp.edison.utilities.NomLexEntry) NomLexReader(edu.illinois.cs.cogcomp.edison.utilities.NomLexReader) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)

Example 15 with TextAnnotation

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation in project cogcomp-nlp by CogComp.

the class ParseLabelIdentifier method getFeatures.

@Override
public Set<Feature> getFeatures(Constituent c) throws EdisonException {
    TextAnnotation ta = c.getTextAnnotation();
    TreeView parse = (TreeView) ta.getView(parseViewName);
    String l;
    try {
        l = parse.getParsePhrase(c).getLabel();
    } catch (Exception e) {
        throw new EdisonException(e);
    }
    boolean found = isLabelValid(l);
    Set<Feature> features = new LinkedHashSet<>();
    if (found) {
        features.add(DiscreteFeature.create(label));
    }
    return features;
}
Also used : LinkedHashSet(java.util.LinkedHashSet) TreeView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException) DiscreteFeature(edu.illinois.cs.cogcomp.edison.features.DiscreteFeature) Feature(edu.illinois.cs.cogcomp.edison.features.Feature) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Aggregations

TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)292 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)121 Test (org.junit.Test)84 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)60 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)48 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)40 ArrayList (java.util.ArrayList)33 TokenizerTextAnnotationBuilder (edu.illinois.cs.cogcomp.nlp.utility.TokenizerTextAnnotationBuilder)32 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)28 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)27 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)24 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)22 IOException (java.io.IOException)22 LinkedHashSet (java.util.LinkedHashSet)21 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)20 StatefulTokenizer (edu.illinois.cs.cogcomp.nlp.tokenizer.StatefulTokenizer)19 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)18 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)18 File (java.io.File)18 XmlTextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.XmlTextAnnotation)16