Search in sources :

Example 6 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class POSMikheevCounter method buildTableHelper.

/**
     * A table is built from a given source corpus file by counting the number of times that each
     * suffix-POS association in a source corpus.
     * 
     * @param fileName file name of the source corpus
     * @throws Exception
     **/
private void buildTableHelper(String fileName) throws Exception {
    PennTreebankPOSReader reader = new PennTreebankPOSReader(this.corpusName);
    reader.readFile(fileName);
    List<TextAnnotation> tas = reader.getTextAnnotations();
    for (TextAnnotation ta : tas) {
        for (int tokenId = 0; tokenId < ta.size(); tokenId++) {
            String form = ta.getToken(tokenId);
            String tag = ((SpanLabelView) ta.getView(ViewNames.POS)).getLabel(tokenId);
            if (form.length() >= 5) {
                boolean allLetters = true;
                for (int i = form.length() - 3; i < form.length() && allLetters; ++i) allLetters = Character.isLetter(form.charAt(i));
                if (allLetters) {
                    // Word w = (Word) example;
                    HashMap<String, TreeMap<String, Integer>> t = null;
                    if (WordHelpers.isCapitalized(ta, tokenId)) {
                        int headOfSentence = ta.getSentence(ta.getSentenceId(tokenId)).getStartSpan();
                        if (tokenId == headOfSentence)
                            t = firstCapitalized;
                        else
                            t = notFirstCapitalized;
                    } else {
                        if (form.contains("-"))
                            return;
                        t = table;
                    }
                    form = form.toLowerCase();
                    count(t, form.substring(form.length() - 3), tag);
                    if (form.length() >= 6 && Character.isLetter(form.charAt(form.length() - 4)))
                        count(t, form.substring(form.length() - 4), tag);
                }
            }
        }
    }
}
Also used : PennTreebankPOSReader(edu.illinois.cs.cogcomp.nlp.corpusreaders.PennTreebankPOSReader) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) TreeMap(java.util.TreeMap)

Example 7 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class StanfordOpenIEHandler method addView.

@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
    Annotation document = new Annotation(ta.text);
    pipeline.annotate(document);
    SpanLabelView vu = new SpanLabelView(viewName, ta);
    for (CoreMap sentence : document.get(CoreAnnotations.SentencesAnnotation.class)) {
        Collection<RelationTriple> triples = sentence.get(NaturalLogicAnnotations.RelationTriplesAnnotation.class);
        for (RelationTriple triple : triples) {
            Constituent subject = getConstituent(triple.subjectGloss(), triple.subjectTokenSpan(), sentence, ta);
            subject.addAttribute("subjectGloss", triple.subjectGloss());
            subject.addAttribute("subjectLemmaGloss", triple.subjectLemmaGloss());
            subject.addAttribute("subjectLink", triple.subjectLink());
            Constituent object = getConstituent(triple.objectGloss(), triple.objectTokenSpan(), sentence, ta);
            object.addAttribute("objectGloss", triple.objectGloss());
            object.addAttribute("objectLemmaGloss", triple.objectLemmaGloss());
            object.addAttribute("objectLink", triple.objectLink());
            Constituent relation = getConstituent(triple.relationGloss(), triple.relationTokenSpan(), sentence, ta);
            relation.addAttribute("relationGloss", triple.relationGloss());
            relation.addAttribute("relationLemmaGloss", triple.relationLemmaGloss());
            Relation subj = new Relation("subj", relation, subject, triple.confidence);
            Relation obj = new Relation("obj", relation, object, triple.confidence);
            vu.addRelation(subj);
            vu.addRelation(obj);
            vu.addConstituent(subject);
            vu.addConstituent(object);
            vu.addConstituent(relation);
        }
    }
    ta.addView(viewName, vu);
}
Also used : Relation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation) RelationTriple(edu.stanford.nlp.ie.util.RelationTriple) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) NaturalLogicAnnotations(edu.stanford.nlp.naturalli.NaturalLogicAnnotations) CoreMap(edu.stanford.nlp.util.CoreMap) TextAnnotation(edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation) Annotation(edu.stanford.nlp.pipeline.Annotation) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 8 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class PrepSRLAnnotator method addView.

@Override
protected void addView(TextAnnotation ta) throws AnnotatorException {
    List<Constituent> candidates = new ArrayList<>();
    for (Constituent c : ta.getView(ViewNames.TOKENS).getConstituents()) {
        int tokenId = c.getStartSpan();
        if (PrepSRLDataReader.isPrep(ta, tokenId))
            candidates.add(c.cloneForNewViewWithDestinationLabel(viewName, DataReader.CANDIDATE));
        // Now check bigrams & trigrams
        Constituent multiWordPrep = PrepSRLDataReader.isBigramPrep(ta, tokenId, viewName);
        if (multiWordPrep != null)
            candidates.add(multiWordPrep);
        multiWordPrep = PrepSRLDataReader.isTrigramPrep(ta, tokenId, viewName);
        if (multiWordPrep != null)
            candidates.add(multiWordPrep);
    }
    SpanLabelView prepositionLabelView = new SpanLabelView(viewName, viewName + "-annotator", ta, 1.0, true);
    for (Constituent c : candidates) {
        String role = classifier.discreteValue(c);
        if (!role.equals(DataReader.CANDIDATE))
            prepositionLabelView.addSpanLabel(c.getStartSpan(), c.getEndSpan(), role, 1.0);
    }
    ta.addView(viewName, prepositionLabelView);
}
Also used : ArrayList(java.util.ArrayList) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Example 9 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class Quantifier method addView.

@Override
public void addView(TextAnnotation ta) throws AnnotatorException {
    assert (ta.hasView(ViewNames.SENTENCE));
    SpanLabelView quantifierView = new SpanLabelView(ViewNames.QUANTITIES, "illinois-quantifier", ta, 1d);
    List<QuantSpan> quantSpans = getSpans(ta.getTokenizedText(), true, ta);
    for (QuantSpan span : quantSpans) {
        int startToken = ta.getTokenIdFromCharacterOffset(span.start);
        int endToken = ta.getTokenIdFromCharacterOffset(span.end);
        quantifierView.addSpanLabel(startToken, endToken, span.object.toString(), 1d);
    }
    ta.addView(ViewNames.QUANTITIES, quantifierView);
}
Also used : SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)

Example 10 with SpanLabelView

use of edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView in project cogcomp-nlp by CogComp.

the class PredicateDetector method getLemma.

public Option<String> getLemma(TextAnnotation ta, int tokenId) {
    String pos = WordHelpers.getPOS(ta, tokenId);
    String token = ta.getToken(tokenId).toLowerCase();
    String lemma = WordHelpers.getLemma(ta, tokenId);
    boolean predicate = false;
    // any token that is a verb is a predicate
    if (POSUtils.isPOSVerb(pos) && !pos.equals("AUX")) {
        if (token.equals("'s") || token.equals("'re") || token.equals("'m"))
            lemma = "be";
        else if (token.equals("'d") || lemma.equals("wo") || lemma.equals("'ll"))
            lemma = "xmodal";
        predicate = !(lemma.equals("xmodal") || pos.equals("MD") || token.equals("'ve"));
        // ignore all instances of has + "to be" if they are followed by a
        // verb or if the token is "be" followed by a verb
        boolean doVerb = lemma.equals("do");
        boolean be = lemma.equals("be");
        boolean have = lemma.equals("have");
        if (tokenId < ta.size() - 1) {
            if (be) {
                SpanLabelView chunk = (SpanLabelView) ta.getView(ViewNames.SHALLOW_PARSE);
                for (Constituent c : chunk.getConstituentsCoveringToken(tokenId)) {
                    // token, then there is another verb here
                    if (c.getEndSpan() - 1 != tokenId) {
                        predicate = false;
                        break;
                    }
                }
            }
            // ignore "have + be"
            if (have && WordHelpers.getLemma(ta, tokenId + 1).equals("be")) {
                predicate = false;
            }
            // ignore "have/do + verb"
            if ((have || doVerb) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 1)))
                predicate = false;
            if (token.equals("according") && ta.getToken(tokenId + 1).toLowerCase().equals("to"))
                predicate = false;
        }
        if (tokenId < ta.size() - 2) {
            // ignore don't + V or haven't + V
            if (doVerb || have) {
                String nextToken = ta.getToken(tokenId + 1).toLowerCase();
                if ((nextToken.equals("n't") || nextToken.equals("not")) && POSUtils.isPOSVerb(WordHelpers.getPOS(ta, tokenId + 2)))
                    predicate = false;
            }
        }
    } else if (token.startsWith("re-")) {
        String trim = token.replace("re-", "");
        predicate = WordNetPlusLemmaViewGenerator.lemmaDict.contains(trim);
    }
    if (predicate) {
        return new Option<>(lemma);
    } else {
        return Option.empty();
    }
}
Also used : Option(edu.illinois.cs.cogcomp.core.datastructures.Option) SpanLabelView(edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView) Constituent(edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)

Aggregations

SpanLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.SpanLabelView)24 Constituent (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Constituent)12 TextAnnotation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TextAnnotation)12 ArrayList (java.util.ArrayList)5 IntPair (edu.illinois.cs.cogcomp.core.datastructures.IntPair)4 Test (org.junit.Test)3 AnnotatorException (edu.illinois.cs.cogcomp.annotation.AnnotatorException)2 Relation (edu.illinois.cs.cogcomp.core.datastructures.textannotation.Relation)2 TokenLabelView (edu.illinois.cs.cogcomp.core.datastructures.textannotation.TokenLabelView)2 View (edu.illinois.cs.cogcomp.core.datastructures.textannotation.View)2 ResourceManager (edu.illinois.cs.cogcomp.core.utilities.configuration.ResourceManager)2 EdisonException (edu.illinois.cs.cogcomp.edison.utilities.EdisonException)2 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)2 Annotation (edu.stanford.nlp.pipeline.Annotation)2 CoreMap (edu.stanford.nlp.util.CoreMap)2 LinkedHashSet (java.util.LinkedHashSet)2 Properties (java.util.Properties)2 Comma (edu.illinois.cs.cogcomp.comma.datastructures.Comma)1 CommaSRLSentence (edu.illinois.cs.cogcomp.comma.datastructures.CommaSRLSentence)1 Option (edu.illinois.cs.cogcomp.core.datastructures.Option)1