Search in sources :

Example 76 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class CoNLLUDocumentWriter method printSemanticGraph.

public String printSemanticGraph(SemanticGraph sg, boolean unescapeParenthesis) {
    StringBuilder sb = new StringBuilder();
    /* Print comments. */
    for (String comment : sg.getComments()) {
        sb.append(comment).append("\n");
    }
    for (IndexedWord token : sg.vertexListSorted()) {
        /* Check for multiword tokens. */
        if (token.containsKey(CoreAnnotations.CoNLLUTokenSpanAnnotation.class)) {
            IntPair tokenSpan = token.get(CoreAnnotations.CoNLLUTokenSpanAnnotation.class);
            if (tokenSpan.getSource() == token.index()) {
                String range = String.format("%d-%d", tokenSpan.getSource(), tokenSpan.getTarget());
                sb.append(String.format("%s\t%s\t_\t_\t_\t_\t_\t_\t_\t_%n", range, token.originalText()));
            }
        }
        /* Try to find main governor and additional dependencies. */
        int govIdx = -1;
        GrammaticalRelation reln = null;
        HashMap<Integer, String> additionalDeps = new HashMap<>();
        for (IndexedWord parent : sg.getParents(token)) {
            SemanticGraphEdge edge = sg.getEdge(parent, token);
            if (govIdx == -1 && !edge.isExtra()) {
                govIdx = parent.index();
                reln = edge.getRelation();
            } else {
                additionalDeps.put(parent.index(), edge.getRelation().toString());
            }
        }
        String additionalDepsString = CoNLLUUtils.toExtraDepsString(additionalDeps);
        String word = token.word();
        String featuresString = CoNLLUUtils.toFeatureString(token.get(CoreAnnotations.CoNLLUFeats.class));
        String pos = token.getString(CoreAnnotations.PartOfSpeechAnnotation.class, "_");
        String upos = token.getString(CoreAnnotations.CoarseTagAnnotation.class, "_");
        String misc = token.getString(CoreAnnotations.CoNLLUMisc.class, "_");
        String lemma = token.getString(CoreAnnotations.LemmaAnnotation.class, "_");
        String relnName = reln == null ? "_" : reln.toString();
        /* Root. */
        if (govIdx == -1 && sg.getRoots().contains(token)) {
            govIdx = 0;
            relnName = GrammaticalRelation.ROOT.toString();
        }
        if (unescapeParenthesis) {
            word = word.replaceAll(LRB_PATTERN, "(");
            word = word.replaceAll(RRB_PATTERN, ")");
            lemma = lemma.replaceAll(LRB_PATTERN, "(");
            lemma = lemma.replaceAll(RRB_PATTERN, ")");
        }
        sb.append(String.format("%d\t%s\t%s\t%s\t%s\t%s\t%d\t%s\t%s\t%s%n", token.index(), word, lemma, upos, pos, featuresString, govIdx, relnName, additionalDepsString, misc));
    }
    sb.append("\n");
    return sb.toString();
}
Also used : HashMap(java.util.HashMap) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord) IntPair(edu.stanford.nlp.util.IntPair) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 77 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalDependenciesFeatureAnnotator method getGraphFeatures.

private static HashMap<String, String> getGraphFeatures(SemanticGraph sg, IndexedWord word) {
    HashMap<String, String> features = new HashMap<>();
    /* Determine the case of "you". */
    if (word.tag().equals("PRP") && (word.value().equalsIgnoreCase("you") || word.value().equalsIgnoreCase("it"))) {
        features.put("Case", pronounCase(sg, word));
    }
    /* Determine the person of "was". */
    if (word.tag().equals("VBD") && word.value().equalsIgnoreCase("was")) {
        String person = wasPerson(sg, word);
        if (person != null) {
            features.put("Person", person);
        }
    }
    /* Determine features of relative and interrogative pronouns. */
    features.putAll(getRelAndIntPronFeatures(sg, word));
    /* Determine features of gerunds and present participles. */
    if (word.tag().equals("VBG")) {
        if (hasBeAux(sg, word)) {
            features.put("VerbForm", "Part");
            features.put("Tense", "Pres");
        } else {
            features.put("VerbForm", "Ger");
        }
    }
    /* Determine whether reflexive pronoun is reflexive or intensive. */
    if (word.value().matches(SELF_REGEX) && word.tag().equals("PRP")) {
        IndexedWord parent = sg.getParent(word);
        if (parent != null) {
            SemanticGraphEdge edge = sg.getEdge(parent, word);
            if (edge.getRelation() != UniversalEnglishGrammaticalRelations.NP_ADVERBIAL_MODIFIER) {
                features.put("Case", "Acc");
                features.put("Reflex", "Yes");
            }
        }
    }
    /* Voice feature. */
    if (word.tag().equals("VBN")) {
        if (sg.hasChildWithReln(word, UniversalEnglishGrammaticalRelations.AUX_PASSIVE_MODIFIER)) {
            features.put("Voice", "Pass");
        }
    }
    return features;
}
Also used : HashMap(java.util.HashMap) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 78 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalDependenciesFeatureAnnotator method main.

public static void main(String[] args) throws IOException {
    if (args.length < 2) {
        log.info("Usage: ");
        log.info("java ");
        log.info(UniversalDependenciesFeatureAnnotator.class.getCanonicalName());
        log.info(" CoNLL-U_file tree_file [-addUPOS -escapeParenthesis]");
        return;
    }
    String coNLLUFile = args[0];
    String treeFile = args[1];
    boolean addUPOS = false;
    boolean escapeParens = false;
    for (int i = 2; i < args.length; i++) {
        if (args[i].equals("-addUPOS")) {
            addUPOS = true;
        } else if (args[i].equals("-escapeParenthesis")) {
            escapeParens = true;
        }
    }
    UniversalDependenciesFeatureAnnotator featureAnnotator = new UniversalDependenciesFeatureAnnotator();
    Reader r = IOUtils.readerFromString(coNLLUFile);
    CoNLLUDocumentReader depReader = new CoNLLUDocumentReader();
    CoNLLUDocumentWriter depWriter = new CoNLLUDocumentWriter();
    Iterator<SemanticGraph> it = depReader.getIterator(r);
    Iterator<Tree> treeIt = treebankIterator(treeFile);
    while (it.hasNext()) {
        SemanticGraph sg = it.next();
        Tree t = treeIt.next();
        if (t == null || t.yield().size() != sg.size()) {
            StringBuilder sentenceSb = new StringBuilder();
            for (IndexedWord word : sg.vertexListSorted()) {
                sentenceSb.append(word.get(CoreAnnotations.TextAnnotation.class));
                sentenceSb.append(' ');
            }
            throw new RuntimeException("CoNLL-U file and tree file are not aligned. \n" + "Sentence: " + sentenceSb + '\n' + "Tree: " + t.pennString());
        }
        featureAnnotator.addFeatures(sg, t, true, addUPOS);
        System.out.print(depWriter.printSemanticGraph(sg, !escapeParens));
    }
}
Also used : Reader(java.io.Reader) BufferedReader(java.io.BufferedReader) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Tree(edu.stanford.nlp.trees.Tree) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 79 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalDependenciesFeatureAnnotator method pronounCase.

/**
   * Determine the case of the pronoun "you" or "it".
   */
private static String pronounCase(SemanticGraph sg, IndexedWord word) {
    word = sg.getNodeByIndex(word.index());
    IndexedWord parent = sg.getParent(word);
    if (parent != null) {
        SemanticGraphEdge edge = sg.getEdge(parent, word);
        if (edge != null) {
            if (UniversalEnglishGrammaticalRelations.OBJECT.isAncestor(edge.getRelation())) {
                /* "you" is an object. */
                return "Acc";
            } else if (UniversalEnglishGrammaticalRelations.NOMINAL_MODIFIER.isAncestor(edge.getRelation()) || edge.getRelation() == GrammaticalRelation.ROOT) {
                if (sg.hasChildWithReln(word, UniversalEnglishGrammaticalRelations.CASE_MARKER)) {
                    /* "you" is the head of a prepositional phrase. */
                    return "Acc";
                }
            }
        }
    }
    return "Nom";
}
Also used : IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Example 80 with IndexedWord

use of edu.stanford.nlp.ling.IndexedWord in project CoreNLP by stanfordnlp.

the class UniversalDependenciesFeatureAnnotator method getRelAndIntPronFeatures.

/**
   * Extracts features from relative and interrogative pronouns.
   */
private static HashMap<String, String> getRelAndIntPronFeatures(SemanticGraph sg, IndexedWord word) {
    HashMap<String, String> features = new HashMap<>();
    if (word.tag().startsWith("W")) {
        boolean isRel = false;
        IndexedWord parent = sg.getParent(word);
        if (parent != null) {
            IndexedWord parentParent = sg.getParent(parent);
            if (parentParent != null) {
                SemanticGraphEdge edge = sg.getEdge(parentParent, parent);
                isRel = edge.getRelation().equals(UniversalEnglishGrammaticalRelations.RELATIVE_CLAUSE_MODIFIER);
            }
        }
        if (isRel) {
            features.put("PronType", "Rel");
        } else {
            if (word.value().equalsIgnoreCase("that")) {
                features.put("PronType", "Dem");
            } else {
                features.put("PronType", "Int");
            }
        }
    }
    return features;
}
Also used : HashMap(java.util.HashMap) IndexedWord(edu.stanford.nlp.ling.IndexedWord) SemanticGraphEdge(edu.stanford.nlp.semgraph.SemanticGraphEdge)

Aggregations

IndexedWord (edu.stanford.nlp.ling.IndexedWord)204 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)55 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)53 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)41 CoreLabel (edu.stanford.nlp.ling.CoreLabel)38 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)36 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)24 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)21 ArrayList (java.util.ArrayList)16 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)10 Tree (edu.stanford.nlp.trees.Tree)10 Pair (edu.stanford.nlp.util.Pair)10 CoreMap (edu.stanford.nlp.util.CoreMap)8 IntPair (edu.stanford.nlp.util.IntPair)8 java.util (java.util)8 Collectors (java.util.stream.Collectors)8 Span (edu.stanford.nlp.ie.machinereading.structure.Span)7 Annotation (edu.stanford.nlp.pipeline.Annotation)6 edu.stanford.nlp.util (edu.stanford.nlp.util)6 Mention (edu.stanford.nlp.coref.data.Mention)5