Search in sources :

Example 71 with SemanticGraph

use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.

the class TSVUtils method parseJsonTree.

/**
   * Parse a JSON formatted tree into a SemanticGraph.
   * @param jsonString The JSON string tree to parse, e.g:
   * "[{\"\"dependent\"\": 7, \"\"dep\"\": \"\"root\"\", \"\"governorgloss\"\": \"\"root\"\", \"\"governor\"\": 0, \"\"dependentgloss\"\": \"\"sport\"\"}, {\"\"dependent\"\": 1, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 2, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"is\"\"}, {\"\"dependent\"\": 3, \"\"dep\"\": \"\"neg\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"not\"\"}, {\"\"dependent\"\": 4, \"\"dep\"\": \"\"det\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"a\"\"}, {\"\"dependent\"\": 5, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"physical\"\", \"\"governor\"\": 6, \"\"dependentgloss\"\": \"\"predominantly\"\"}, {\"\"dependent\"\": 6, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"physical\"\"}, {\"\"dependent\"\": 9, \"\"dep\"\": \"\"advmod\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"yet\"\"}, {\"\"dependent\"\": 10, \"\"dep\"\": \"\"nsubj\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"neither\"\"}, {\"\"dependent\"\": 11, \"\"dep\"\": \"\"cop\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"are\"\"}, {\"\"dependent\"\": 12, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"shooting\"\"}, {\"\"dependent\"\": 13, \"\"dep\"\": \"\"cc\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"and\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"parataxis\"\", \"\"governorgloss\"\": \"\"sport\"\", \"\"governor\"\": 7, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 14, \"\"dep\"\": \"\"conj:and\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"curling\"\"}, {\"\"dependent\"\": 16, \"\"dep\"\": \"\"nsubjpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"which\"\"}, {\"\"dependent\"\": 18, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"fact\"\", \"\"governor\"\": 19, \"\"dependentgloss\"\": \"\"in\"\"}, {\"\"dependent\"\": 19, \"\"dep\"\": \"\"nmod:in\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"fact\"\"}, {\"\"dependent\"\": 21, \"\"dep\"\": \"\"aux\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"has\"\"}, {\"\"dependent\"\": 22, \"\"dep\"\": \"\"auxpass\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"been\"\"}, {\"\"dependent\"\": 23, \"\"dep\"\": \"\"dep\"\", \"\"governorgloss\"\": \"\"shooting\"\", \"\"governor\"\": 12, \"\"dependentgloss\"\": \"\"nicknamed\"\"}, {\"\"dependent\"\": 25, \"\"dep\"\": \"\"dobj\"\", \"\"governorgloss\"\": \"\"nicknamed\"\", \"\"governor\"\": 23, \"\"dependentgloss\"\": \"\"chess\"\"}, {\"\"dependent\"\": 26, \"\"dep\"\": \"\"case\"\", \"\"governorgloss\"\": \"\"ice\"\", \"\"governor\"\": 27, \"\"dependentgloss\"\": \"\"on\"\"}, {\"\"dependent\"\": 27, \"\"dep\"\": \"\"nmod:on\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"ice\"\"}, {\"\"dependent\"\": 29, \"\"dep\"\": \"\"amod\"\", \"\"governorgloss\"\": \"\"chess\"\", \"\"governor\"\": 25, \"\"dependentgloss\"\": \"\"5\"\"}]");
   * @param tokens The tokens of the sentence, to form the backing labels of the tree.
   * @return A semantic graph of the sentence, according to the given tree.
   */
public static SemanticGraph parseJsonTree(String jsonString, List<CoreLabel> tokens) {
    // Escape quoted string parts
    JsonReader json = Json.createReader(new StringReader(jsonString));
    SemanticGraph tree = new SemanticGraph();
    JsonArray array = json.readArray();
    if (array == null || array.isEmpty()) {
        return tree;
    }
    IndexedWord[] vertices = new IndexedWord[tokens.size() + 2];
    // Add edges
    for (int i = 0; i < array.size(); i++) {
        JsonObject entry = array.getJsonObject(i);
        // Parse row
        int dependentIndex = entry.getInt("dependent");
        if (vertices[dependentIndex] == null) {
            if (dependentIndex > tokens.size()) {
                // Bizarre mismatch in sizes; the malt parser seems to do this often
                return new SemanticGraph();
            }
            vertices[dependentIndex] = new IndexedWord(tokens.get(dependentIndex - 1));
        }
        IndexedWord dependent = vertices[dependentIndex];
        int governorIndex = entry.getInt("governor");
        if (governorIndex > tokens.size()) {
            // Bizarre mismatch in sizes; the malt parser seems to do this often
            return new SemanticGraph();
        }
        if (vertices[governorIndex] == null && governorIndex > 0) {
            vertices[governorIndex] = new IndexedWord(tokens.get(governorIndex - 1));
        }
        IndexedWord governor = vertices[governorIndex];
        String relation = entry.getString("dep");
        // Process row
        if (governorIndex == 0) {
            tree.addRoot(dependent);
        } else {
            tree.addVertex(dependent);
            if (!tree.containsVertex(governor)) {
                tree.addVertex(governor);
            }
            if (!"ref".equals(relation)) {
                tree.addEdge(governor, dependent, GrammaticalRelation.valueOf(Language.English, relation), Double.NEGATIVE_INFINITY, false);
            }
        }
    }
    return tree;
}
Also used : StringReader(java.io.StringReader) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Example 72 with SemanticGraph

use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.

the class UniversalDependenciesConverter method main.

/**
   *
   * Converts a constituency tree to the English basic, enhanced, or
   * enhanced++ Universal dependencies representation, or an English basic
   * Universal dependencies tree to the enhanced or enhanced++ representation.
   *
   * Command-line options:<br>
   * {@code -treeFile}: File with PTB-formatted constituency trees<br>
   * {@code -conlluFile}: File with basic dependency trees in CoNLL-U format<br>
   * {@code -outputRepresentation}: "basic" (default), "enhanced", or "enhanced++"
   *
   */
public static void main(String[] args) {
    Properties props = StringUtils.argsToProperties(args);
    String treeFileName = props.getProperty("treeFile");
    String conlluFileName = props.getProperty("conlluFile");
    String outputRepresentation = props.getProperty("outputRepresentation", "basic");
    // = null;
    Iterator<SemanticGraph> sgIterator;
    if (treeFileName != null) {
        MemoryTreebank tb = new MemoryTreebank(new NPTmpRetainingTreeNormalizer(0, false, 1, false));
        tb.loadPath(treeFileName);
        Iterator<Tree> treeIterator = tb.iterator();
        sgIterator = new TreeToSemanticGraphIterator(treeIterator);
    } else if (conlluFileName != null) {
        CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
        try {
            sgIterator = reader.getIterator(IOUtils.readerFromString(conlluFileName));
        } catch (Exception e) {
            throw new RuntimeException(e);
        }
    } else {
        System.err.println("No input file specified!");
        System.err.println("");
        System.err.printf("Usage: java %s [-treeFile trees.tree | -conlluFile deptrees.conllu]" + " [-outputRepresentation basic|enhanced|enhanced++ (default: basic)]%n", UniversalDependenciesConverter.class.getCanonicalName());
        return;
    }
    CoNLLUDocumentWriter writer = new CoNLLUDocumentWriter();
    while (sgIterator.hasNext()) {
        SemanticGraph sg = sgIterator.next();
        if (treeFileName != null) {
            //add UPOS tags
            Tree tree = ((TreeToSemanticGraphIterator) sgIterator).getCurrentTree();
            Tree uposTree = UniversalPOSMapper.mapTree(tree);
            List<Label> uposLabels = uposTree.preTerminalYield();
            for (IndexedWord token : sg.vertexListSorted()) {
                int idx = token.index() - 1;
                String uposTag = uposLabels.get(idx).value();
                token.set(CoreAnnotations.CoarseTagAnnotation.class, uposTag);
            }
        } else {
            addLemmata(sg);
            if (USE_NAME) {
                addNERTags(sg);
            }
        }
        if (outputRepresentation.equalsIgnoreCase("enhanced")) {
            sg = convertBasicToEnhanced(sg);
        } else if (outputRepresentation.equalsIgnoreCase("enhanced++")) {
            sg = convertBasicToEnhancedPlusPlus(sg);
        }
        System.out.print(writer.printSemanticGraph(sg));
    }
}
Also used : Properties(java.util.Properties) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 73 with SemanticGraph

use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.

the class UniversalDependenciesConverter method convertTreeToBasic.

private static SemanticGraph convertTreeToBasic(Tree tree) {
    addLemmata(tree);
    addNERTags(tree);
    SemanticGraph sg = SemanticGraphFactory.makeFromTree(tree, SemanticGraphFactory.Mode.BASIC, GrammaticalStructure.Extras.NONE, null, false, true);
    addLemmata(sg);
    if (USE_NAME) {
        addNERTags(sg);
    }
    return sg;
}
Also used : SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph)

Example 74 with SemanticGraph

use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.

the class TSVSentenceIterator method toCoreMap.

public static CoreMap toCoreMap(List<SentenceField> fields, List<String> entries) {
    CoreMap map = new ArrayCoreMap(fields.size());
    Optional<List<CoreLabel>> tokens = Optional.empty();
    // First pass - process all token level stuff.
    for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
        SentenceField field = entry.first;
        String value = unescapeSQL(entry.second);
        switch(field) {
            case WORDS:
                {
                    List<String> values = TSVUtils.parseArray(value);
                    if (!tokens.isPresent()) {
                        tokens = Optional.of(new ArrayList<>(values.size()));
                        for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
                    }
                    int beginChar = 0;
                    for (int i = 0; i < values.size(); i++) {
                        tokens.get().get(i).setValue(values.get(i));
                        tokens.get().get(i).setWord(values.get(i));
                        tokens.get().get(i).setBeginPosition(beginChar);
                        tokens.get().get(i).setEndPosition(beginChar + values.get(i).length());
                        beginChar += values.get(i).length() + 1;
                    }
                }
                break;
            case LEMMAS:
                {
                    List<String> values = TSVUtils.parseArray(value);
                    if (!tokens.isPresent()) {
                        tokens = Optional.of(new ArrayList<>(values.size()));
                        for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
                    }
                    for (int i = 0; i < values.size(); i++) {
                        tokens.get().get(i).setLemma(values.get(i));
                    }
                }
                break;
            case POS_TAGS:
                {
                    List<String> values = TSVUtils.parseArray(value);
                    if (!tokens.isPresent()) {
                        tokens = Optional.of(new ArrayList<>(values.size()));
                        for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
                    }
                    for (int i = 0; i < values.size(); i++) {
                        tokens.get().get(i).setTag(values.get(i));
                    }
                }
                break;
            case NER_TAGS:
                {
                    List<String> values = TSVUtils.parseArray(value);
                    if (!tokens.isPresent()) {
                        tokens = Optional.of(new ArrayList<>(values.size()));
                        for (int i = 0; i < values.size(); i++) tokens.get().add(new CoreLabel());
                    }
                    for (int i = 0; i < values.size(); i++) {
                        tokens.get().get(i).setNER(values.get(i));
                    }
                }
                break;
            default:
                // ignore.
                break;
        }
    }
    // Document specific stuff.
    Optional<String> docId = Optional.empty();
    Optional<String> sentenceId = Optional.empty();
    Optional<Integer> sentenceIndex = Optional.empty();
    for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
        SentenceField field = entry.first;
        String value = unescapeSQL(entry.second);
        switch(field) {
            case ID:
                sentenceId = Optional.of(value);
                break;
            case DOC_ID:
                docId = Optional.of(value);
                break;
            case SENTENCE_INDEX:
                sentenceIndex = Optional.of(Integer.parseInt(value));
                break;
            case GLOSS:
                value = value.replace("\\n", "\n").replace("\\t", "\t");
                map.set(CoreAnnotations.TextAnnotation.class, value);
                break;
            default:
                // ignore.
                break;
        }
    }
    // High level document stuff
    map.set(CoreAnnotations.SentenceIDAnnotation.class, sentenceId.orElse("-1"));
    map.set(CoreAnnotations.DocIDAnnotation.class, docId.orElse("???"));
    map.set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(0));
    // Doc-char
    if (tokens.isPresent()) {
        for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
            SentenceField field = entry.first;
            String value = unescapeSQL(entry.second);
            switch(field) {
                case DOC_CHAR_BEGIN:
                    {
                        List<String> values = TSVUtils.parseArray(value);
                        for (int i = 0; i < tokens.get().size(); i++) {
                            tokens.get().get(i).setBeginPosition(Integer.parseInt(values.get(i)));
                        }
                    }
                    break;
                case DOC_CHAR_END:
                    {
                        List<String> values = TSVUtils.parseArray(value);
                        for (int i = 0; i < tokens.get().size(); i++) {
                            tokens.get().get(i).setEndPosition(Integer.parseInt(values.get(i)));
                        }
                    }
                    break;
                default:
                    // ignore.
                    break;
            }
        }
    }
    // Final token level stuff.
    if (tokens.isPresent()) {
        for (int i = 0; i < tokens.get().size(); i++) {
            tokens.get().get(i).set(CoreAnnotations.DocIDAnnotation.class, docId.orElse("???"));
            tokens.get().get(i).set(CoreAnnotations.SentenceIndexAnnotation.class, sentenceIndex.orElse(-1));
            tokens.get().get(i).set(CoreAnnotations.IndexAnnotation.class, i + 1);
            tokens.get().get(i).set(CoreAnnotations.TokenBeginAnnotation.class, i);
            tokens.get().get(i).set(CoreAnnotations.TokenEndAnnotation.class, i + 1);
        }
    }
    // Dependency trees
    if (tokens.isPresent()) {
        map.set(CoreAnnotations.TokensAnnotation.class, tokens.get());
        map.set(CoreAnnotations.TokenBeginAnnotation.class, 0);
        map.set(CoreAnnotations.TokenEndAnnotation.class, tokens.get().size());
        for (Pair<SentenceField, String> entry : Iterables.zip(fields, entries)) {
            SentenceField field = entry.first;
            String value = unescapeSQL(entry.second);
            switch(field) {
                case DEPENDENCIES_BASIC:
                    {
                        SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
                        map.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
                    //            if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class))
                    //              map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
                    //            if (!map.containsKey(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class))
                    //              map.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
                    }
                    break;
                case DEPENDENCIES_COLLAPSED:
                    {
                        SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
                        map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
                    }
                    break;
                case DEPENDENCIES_COLLAPSED_CC:
                    {
                        SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
                        //            if (!map.containsKey(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class))
                        //              map.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, graph);
                        //            map.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, graph);
                        map.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, graph);
                    }
                    break;
                case DEPENDENCIES_ALTERNATE:
                    {
                        SemanticGraph graph = TSVUtils.parseJsonTree(value, tokens.get());
                        map.set(SemanticGraphCoreAnnotations.AlternativeDependenciesAnnotation.class, graph);
                    }
                    break;
                default:
                    // ignore.
                    break;
            }
        }
    }
    return map;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) List(java.util.List) ArrayList(java.util.ArrayList)

Example 75 with SemanticGraph

use of edu.stanford.nlp.semgraph.SemanticGraph in project CoreNLP by stanfordnlp.

the class IETestUtils method parseCoNLL.

/**
   * Parse a CoNLL formatted string into a SemanticGraph.
   * This is useful for tests so that you don't need to load the model (and are robust to
   * model changes).
   *
   * @param conll The CoNLL format for the tree.
   * @return A semantic graph, as well as the flat tokens of the sentence.
   */
public static Pair<SemanticGraph, List<CoreLabel>> parseCoNLL(String conll) {
    List<CoreLabel> sentence = new ArrayList<>();
    SemanticGraph tree = new SemanticGraph();
    for (String line : conll.split("\n")) {
        if (line.trim().equals("")) {
            continue;
        }
        String[] fields = line.trim().split("\\s+");
        int index = Integer.parseInt(fields[0]);
        String word = fields[1];
        CoreLabel label = mkWord(word, index);
        sentence.add(label);
        if (fields[2].equals("0")) {
            tree.addRoot(new IndexedWord(label));
        } else {
            tree.addVertex(new IndexedWord(label));
        }
        if (fields.length > 4) {
            label.setTag(fields[4]);
        }
        if (fields.length > 5) {
            label.setNER(fields[5]);
        }
        if (fields.length > 6) {
            label.setLemma(fields[6]);
        }
    }
    int i = 0;
    for (String line : conll.split("\n")) {
        if (line.trim().equals("")) {
            continue;
        }
        String[] fields = line.trim().split("\\s+");
        int parent = Integer.parseInt(fields[2]);
        String reln = fields[3];
        if (parent > 0) {
            tree.addEdge(new IndexedWord(sentence.get(parent - 1)), new IndexedWord(sentence.get(i)), new GrammaticalRelation(Language.UniversalEnglish, reln, null, null), 1.0, false);
        }
        i += 1;
    }
    return Pair.makePair(tree, sentence);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ArrayList(java.util.ArrayList) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) GrammaticalRelation(edu.stanford.nlp.trees.GrammaticalRelation) IndexedWord(edu.stanford.nlp.ling.IndexedWord)

Aggregations

SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)126 IndexedWord (edu.stanford.nlp.ling.IndexedWord)57 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)53 CoreLabel (edu.stanford.nlp.ling.CoreLabel)51 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)47 SemanticGraphEdge (edu.stanford.nlp.semgraph.SemanticGraphEdge)24 Tree (edu.stanford.nlp.trees.Tree)20 CoreMap (edu.stanford.nlp.util.CoreMap)19 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)18 SemgrexMatcher (edu.stanford.nlp.semgraph.semgrex.SemgrexMatcher)16 GrammaticalRelation (edu.stanford.nlp.trees.GrammaticalRelation)16 Annotation (edu.stanford.nlp.pipeline.Annotation)14 SemgrexPattern (edu.stanford.nlp.semgraph.semgrex.SemgrexPattern)12 ArrayList (java.util.ArrayList)12 Mention (edu.stanford.nlp.coref.data.Mention)11 java.util (java.util)11 edu.stanford.nlp.util (edu.stanford.nlp.util)10 Properties (java.util.Properties)9 Collectors (java.util.stream.Collectors)9 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)8