Search in sources :

Example 71 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class FileIOAnnotationJob method getNERString.

protected String getNERString() {
    List<Constituent> constituents = new ArrayList<>(view.getConstituents());
    Collections.sort(constituents, TextAnnotationUtilities.constituentStartComparator);
    StringBuilder sb = new StringBuilder();
    String text = textAnnotation.getText();
    int where = 0;
    for (Constituent c : constituents) {
        int start = c.getStartCharOffset();
        String startstring = text.substring(where, start);
        sb.append(startstring).append("[").append(c.getLabel()).append(" ").append(c.getTokenizedSurfaceForm()).append(" ] ");
        where = c.getEndCharOffset();
    }
    return sb.toString();
}
Also used : ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 72 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class LBJavaFeatureExtractor method classify.

@Override
public FeatureVector classify(Object o) {
    // Make sure the object is a Constituent
    if (!(o instanceof Constituent))
        throw new IllegalArgumentException("Instance must be of type Constituent");
    Constituent instance = (Constituent) o;
    FeatureVector featureVector = new FeatureVector();
    try {
        featureVector = FeatureUtilities.getLBJFeatures(getFeatures(instance));
    } catch (Exception e) {
        logger.debug("Couldn't generate feature {} for constituent {}", getName(), instance);
    }
    return featureVector;
}
Also used : FeatureVector(edu.illinois.cs.cogcomp.lbjava.classify.FeatureVector) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent) EdisonException(edu.illinois.cs.cogcomp.edison.utilities.EdisonException)

Example 73 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class StanfordParseHandler method checkLength.

static void checkLength(TextAnnotation textAnnotation, boolean throwExceptionOnSentenceLengthCheck, int maxParseSentenceLength) throws AnnotatorException {
    if (throwExceptionOnSentenceLengthCheck) {
        Constituent c = HandlerUtils.checkTextAnnotationRespectsSentenceLengthLimit(textAnnotation, maxParseSentenceLength);
        if (null != c) {
            String msg = HandlerUtils.getSentenceLengthError(textAnnotation.getId(), c.getSurfaceForm(), maxParseSentenceLength);
            logger.error(msg);
            throw new AnnotatorException(msg);
        }
    }
}
Also used : AnnotatorException(edu.illinois.cs.cogcomp.annotation.AnnotatorException) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 74 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class PredicateDetector method getLemma.

public Option<String> getLemma(TextAnnotation ta, int tokenId) {
    String pos = WordHelpers.getPOS(ta, tokenId);
    String token = ta.getToken(tokenId).toLowerCase();
    String lemma = WordHelpers.getLemma(ta, tokenId);
    boolean predicate = false;
    // any token that is a verb is a predicate
    if (POSUtils.isPOSVerb(pos) && !pos.equals("AUX")) {
        if (token.equals("'s") || token.equals("'re") || token.equals("'m"))
            lemma = "be";
        else if (token.equals("'d") || lemma.equals("wo") || lemma.equals("'ll"))
            lemma = "xmodal";
        predicate = !(lemma.equals("xmodal") || pos.equals("MD") || token.equals("'ve"));
        // ignore all instances of has + "to be" if they are followed by a
        // verb or if the token is "be" followed by a verb
        boolean doVerb = lemma.equals("do");
        boolean be = lemma.equals("be");
        boolean have = lemma.equals("have");
        if (tokenId < ta.size() - 1) {
            if (be) {
                SpanLabelView chunk = (SpanLabelView) ta.getView(ViewNames.SHALLOW_PARSE);
                for (Constituent c : chunk.getConstituentsCoveringToken(tokenId)) {
                    // token, then there is another verb here
                    if (c.getEndSpan() - 1 != tokenId) {
                        predicate = false;
                        break;
                    }
                }
            }
            // ignore "have + be"
            if (have && WordHelpers.getLemma(ta, tokenId + 1).equals("be")) {
                predicate = false;
            }
            // ignore "have/do + verb"
            if ((have || doVerb) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 1)))
                predicate = false;
            if (token.equals("according") && ta.getToken(tokenId + 1).toLowerCase().equals("to"))
                predicate = false;
        }
        if (tokenId < ta.size() - 2) {
            // ignore don't + V or haven't + V
            if (doVerb || have) {
                String nextToken = ta.getToken(tokenId + 1).toLowerCase();
                if ((nextToken.equals("n't") || nextToken.equals("not")) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 2)))
                    predicate = false;
            }
        }
    } else if (token.startsWith("re-")) {
        String trim = token.replace("re-", "");
        predicate = WordNetPlusLemmaViewGenerator.lemmaDict.contains(trim);
    }
    if (predicate) {
        return new Option<>(lemma);
    } else {
        return Option.empty();
    }
}
Also used : Option(edu.illinois.cs.cogcomp.core.datastructures.Option) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 75 with Constituent

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent in project cogcomp-nlp by CogComp.

the class PredicateDetector method getPredicates.

public List<Constituent> getPredicates(TextAnnotation ta) throws Exception {
    List<Constituent> list = new ArrayList<>();
    for (int i = 0; i < ta.size(); i++) {
        Option<String> opt = getLemma(ta, i);
        if (opt.isPresent()) {
            Constituent c = new Constituent("", "", ta, i, i + 1);
            c.addAttribute(PredicateArgumentView.LemmaIdentifier, opt.get());
            list.add(c);
        }
    }
    return list;
}
Also used : ArrayList(java.util.ArrayList) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)176 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)95 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)51 Feature (edu.illinois.cs.cogcomp.edison.features.Feature)44 Test (org.junit.Test)39 ArrayList (java.util.ArrayList)29 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)25 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)24 LinkedHashSet (java.util.LinkedHashSet)22 TreeView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TreeView)20 DiscreteFeature (edu.illinois.cs.cogcomp.edison.features.DiscreteFeature)20 FeatureExtractor (edu.illinois.cs.cogcomp.edison.features.FeatureExtractor)17 ProjectedPath (edu.illinois.cs.cogcomp.edison.features.lrec.ProjectedPath)16 FeatureManifest (edu.illinois.cs.cogcomp.edison.features.manifest.FeatureManifest)16 FileInputStream (java.io.FileInputStream)16 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)14 PredicateArgumentView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.PredicateArgumentView)13 SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)12 HashSet (java.util.HashSet)12 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)11