Examples with CoNLLUDocumentReader - edu.stanford.nlp.trees.ud.CoNLLUDocumentReader

Example 1 with CoNLLUDocumentReader

use of edu.stanford.nlp.trees.ud.CoNLLUDocumentReader in project CoreNLP by stanfordnlp.

the class SemgrexPattern method main.

/**
   * Prints out all matches of a semgrex pattern on a file of dependencies.
   * <br>
   * Usage:<br>
   * java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
   * <br>
   * See the help() function for a list of possible arguments to provide.
   */
public static void main(String[] args) throws IOException {
    Map<String, Integer> flagMap = Generics.newHashMap();
    flagMap.put(PATTERN, 1);
    flagMap.put(TREE_FILE, 1);
    flagMap.put(MODE, 1);
    flagMap.put(EXTRAS, 1);
    flagMap.put(CONLLU_FILE, 1);
    flagMap.put(OUTPUT_FORMAT_OPTION, 1);
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);
    // TODO: allow patterns to be extracted from a file
    if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
        help();
        System.exit(2);
    }
    SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);
    String modeString = DEFAULT_MODE;
    if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
        modeString = argsMap.get(MODE)[0].toUpperCase();
    }
    SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);
    String outputFormatString = DEFAULT_OUTPUT_FORMAT;
    if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
        outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
    }
    OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);
    boolean useExtras = true;
    if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
        useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
    }
    List<SemanticGraph> graphs = Generics.newArrayList();
    // TODO: allow other sources of graphs, such as dependency files
    if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
        for (String treeFile : argsMap.get(TREE_FILE)) {
            log.info("Loading file " + treeFile);
            MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
            treebank.loadPath(treeFile);
            for (Tree tree : treebank) {
                // TODO: allow other languages... this defaults to English
                SemanticGraph graph = SemanticGraphFactory.makeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE);
                graphs.add(graph);
            }
        }
    }
    if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
        CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
        for (String conlluFile : argsMap.get(CONLLU_FILE)) {
            log.info("Loading file " + conlluFile);
            Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));
            while (it.hasNext()) {
                SemanticGraph graph = it.next();
                graphs.add(graph);
            }
        }
    }
    for (SemanticGraph graph : graphs) {
        SemgrexMatcher matcher = semgrex.matcher(graph);
        if (!(matcher.find())) {
            continue;
        }
        if (outputFormat == OutputFormat.LIST) {
            log.info("Matched graph:");
            log.info(graph.toString(SemanticGraph.OutputFormat.LIST));
            boolean found = true;
            while (found) {
                log.info("Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
                List<String> nodeNames = Generics.newArrayList();
                nodeNames.addAll(matcher.getNodeNames());
                Collections.sort(nodeNames);
                for (String name : nodeNames) {
                    log.info("  " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index());
                }
                log.info();
                found = matcher.find();
            }
        } else if (outputFormat == OutputFormat.OFFSET) {
            if (graph.vertexListSorted().isEmpty()) {
                continue;
            }
            System.out.printf("+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]);
        }
    }
}

Also used : SemanticGraphFactory(edu.stanford.nlp.semgraph.SemanticGraphFactory) CoNLLUDocumentReader(edu.stanford.nlp.trees.ud.CoNLLUDocumentReader) TreeNormalizer(edu.stanford.nlp.trees.TreeNormalizer) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Tree(edu.stanford.nlp.trees.Tree) MemoryTreebank(edu.stanford.nlp.trees.MemoryTreebank)

Aggregations

SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)1 SemanticGraphFactory (edu.stanford.nlp.semgraph.SemanticGraphFactory)1 MemoryTreebank (edu.stanford.nlp.trees.MemoryTreebank)1 Tree (edu.stanford.nlp.trees.Tree)1 TreeNormalizer (edu.stanford.nlp.trees.TreeNormalizer)1 CoNLLUDocumentReader (edu.stanford.nlp.trees.ud.CoNLLUDocumentReader)1