Search in sources :

Example 1 with TreeNormalizer

use of edu.stanford.nlp.trees.TreeNormalizer in project CoreNLP by stanfordnlp.

the class SemgrexPattern method main.

/**
   * Prints out all matches of a semgrex pattern on a file of dependencies.
   * <br>
   * Usage:<br>
   * java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
   * <br>
   * See the help() function for a list of possible arguments to provide.
   */
public static void main(String[] args) throws IOException {
    Map<String, Integer> flagMap = Generics.newHashMap();
    flagMap.put(PATTERN, 1);
    flagMap.put(TREE_FILE, 1);
    flagMap.put(MODE, 1);
    flagMap.put(EXTRAS, 1);
    flagMap.put(CONLLU_FILE, 1);
    flagMap.put(OUTPUT_FORMAT_OPTION, 1);
    Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
    args = argsMap.get(null);
    // TODO: allow patterns to be extracted from a file
    if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
        help();
        System.exit(2);
    }
    SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);
    String modeString = DEFAULT_MODE;
    if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
        modeString = argsMap.get(MODE)[0].toUpperCase();
    }
    SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);
    String outputFormatString = DEFAULT_OUTPUT_FORMAT;
    if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
        outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
    }
    OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);
    boolean useExtras = true;
    if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
        useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
    }
    List<SemanticGraph> graphs = Generics.newArrayList();
    // TODO: allow other sources of graphs, such as dependency files
    if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
        for (String treeFile : argsMap.get(TREE_FILE)) {
            log.info("Loading file " + treeFile);
            MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
            treebank.loadPath(treeFile);
            for (Tree tree : treebank) {
                // TODO: allow other languages... this defaults to English
                SemanticGraph graph = SemanticGraphFactory.makeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE);
                graphs.add(graph);
            }
        }
    }
    if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
        CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
        for (String conlluFile : argsMap.get(CONLLU_FILE)) {
            log.info("Loading file " + conlluFile);
            Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));
            while (it.hasNext()) {
                SemanticGraph graph = it.next();
                graphs.add(graph);
            }
        }
    }
    for (SemanticGraph graph : graphs) {
        SemgrexMatcher matcher = semgrex.matcher(graph);
        if (!(matcher.find())) {
            continue;
        }
        if (outputFormat == OutputFormat.LIST) {
            log.info("Matched graph:");
            log.info(graph.toString(SemanticGraph.OutputFormat.LIST));
            boolean found = true;
            while (found) {
                log.info("Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
                List<String> nodeNames = Generics.newArrayList();
                nodeNames.addAll(matcher.getNodeNames());
                Collections.sort(nodeNames);
                for (String name : nodeNames) {
                    log.info("  " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index());
                }
                log.info();
                found = matcher.find();
            }
        } else if (outputFormat == OutputFormat.OFFSET) {
            if (graph.vertexListSorted().isEmpty()) {
                continue;
            }
            System.out.printf("+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]);
        }
    }
}
Also used : SemanticGraphFactory(edu.stanford.nlp.semgraph.SemanticGraphFactory) CoNLLUDocumentReader(edu.stanford.nlp.trees.ud.CoNLLUDocumentReader) TreeNormalizer(edu.stanford.nlp.trees.TreeNormalizer) SemanticGraph(edu.stanford.nlp.semgraph.SemanticGraph) Tree(edu.stanford.nlp.trees.Tree) MemoryTreebank(edu.stanford.nlp.trees.MemoryTreebank)

Example 2 with TreeNormalizer

use of edu.stanford.nlp.trees.TreeNormalizer in project CoreNLP by stanfordnlp.

the class TaggedFileRecord method createRecord.

public static TaggedFileRecord createRecord(Properties config, String description) {
    String[] pieces = description.split(",");
    if (pieces.length == 1) {
        return new TaggedFileRecord(description, Format.TEXT, getEncoding(config), getTagSeparator(config), null, null, null, null, null, null, null);
    }
    String[] args = new String[pieces.length - 1];
    System.arraycopy(pieces, 0, args, 0, pieces.length - 1);
    String file = pieces[pieces.length - 1];
    Format format = Format.TEXT;
    String encoding = getEncoding(config);
    String tagSeparator = getTagSeparator(config);
    TreeTransformer treeTransformer = null;
    TreeNormalizer treeNormalizer = null;
    TreeReaderFactory trf = null;
    NumberRangesFileFilter treeRange = null;
    Predicate<Tree> treeFilter = null;
    Integer wordColumn = null, tagColumn = null;
    for (String arg : args) {
        String[] argPieces = arg.split("=", 2);
        if (argPieces.length != 2) {
            throw new IllegalArgumentException("TaggedFileRecord argument " + arg + " has an unexpected number of =s");
        }
        if (argPieces[0].equalsIgnoreCase(FORMAT)) {
            format = Format.valueOf(argPieces[1]);
        } else if (argPieces[0].equalsIgnoreCase(ENCODING)) {
            encoding = argPieces[1];
        } else if (argPieces[0].equalsIgnoreCase(TAG_SEPARATOR)) {
            tagSeparator = argPieces[1];
        } else if (argPieces[0].equalsIgnoreCase(TREE_TRANSFORMER)) {
            treeTransformer = ReflectionLoading.loadByReflection(argPieces[1]);
        } else if (argPieces[0].equalsIgnoreCase(TREE_NORMALIZER)) {
            treeNormalizer = ReflectionLoading.loadByReflection(argPieces[1]);
        } else if (argPieces[0].equalsIgnoreCase(TREE_READER)) {
            trf = ReflectionLoading.loadByReflection(argPieces[1]);
        } else if (argPieces[0].equalsIgnoreCase(TREE_RANGE)) {
            String range = argPieces[1].replaceAll(":", ",");
            treeRange = new NumberRangesFileFilter(range, true);
        } else if (argPieces[0].equalsIgnoreCase(TREE_FILTER)) {
            treeFilter = ReflectionLoading.loadByReflection(argPieces[1]);
        } else if (argPieces[0].equalsIgnoreCase(WORD_COLUMN)) {
            wordColumn = Integer.valueOf(argPieces[1]);
        } else if (argPieces[0].equalsIgnoreCase(TAG_COLUMN)) {
            tagColumn = Integer.valueOf(argPieces[1]);
        } else {
            throw new IllegalArgumentException("TaggedFileRecord argument " + argPieces[0] + " is unknown");
        }
    }
    return new TaggedFileRecord(file, format, encoding, tagSeparator, treeTransformer, treeNormalizer, trf, treeRange, treeFilter, wordColumn, tagColumn);
}
Also used : TreeNormalizer(edu.stanford.nlp.trees.TreeNormalizer) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer)

Aggregations

Tree (edu.stanford.nlp.trees.Tree)2 TreeNormalizer (edu.stanford.nlp.trees.TreeNormalizer)2 NumberRangesFileFilter (edu.stanford.nlp.io.NumberRangesFileFilter)1 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)1 SemanticGraphFactory (edu.stanford.nlp.semgraph.SemanticGraphFactory)1 MemoryTreebank (edu.stanford.nlp.trees.MemoryTreebank)1 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)1 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)1 CoNLLUDocumentReader (edu.stanford.nlp.trees.ud.CoNLLUDocumentReader)1