use of edu.stanford.nlp.trees.ud.CoNLLUDocumentReader in project CoreNLP by stanfordnlp.
the class SemgrexPattern method main.
/**
* Prints out all matches of a semgrex pattern on a file of dependencies.
* <br>
* Usage:<br>
* java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
* <br>
* See the help() function for a list of possible arguments to provide.
*/
public static void main(String[] args) throws IOException {
Map<String, Integer> flagMap = Generics.newHashMap();
flagMap.put(PATTERN, 1);
flagMap.put(TREE_FILE, 1);
flagMap.put(MODE, 1);
flagMap.put(EXTRAS, 1);
flagMap.put(CONLLU_FILE, 1);
flagMap.put(OUTPUT_FORMAT_OPTION, 1);
Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
args = argsMap.get(null);
// TODO: allow patterns to be extracted from a file
if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
help();
System.exit(2);
}
SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);
String modeString = DEFAULT_MODE;
if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
modeString = argsMap.get(MODE)[0].toUpperCase();
}
SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);
String outputFormatString = DEFAULT_OUTPUT_FORMAT;
if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
}
OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);
boolean useExtras = true;
if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
}
List<SemanticGraph> graphs = Generics.newArrayList();
// TODO: allow other sources of graphs, such as dependency files
if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
for (String treeFile : argsMap.get(TREE_FILE)) {
log.info("Loading file " + treeFile);
MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
treebank.loadPath(treeFile);
for (Tree tree : treebank) {
// TODO: allow other languages... this defaults to English
SemanticGraph graph = SemanticGraphFactory.makeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE);
graphs.add(graph);
}
}
}
if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
for (String conlluFile : argsMap.get(CONLLU_FILE)) {
log.info("Loading file " + conlluFile);
Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));
while (it.hasNext()) {
SemanticGraph graph = it.next();
graphs.add(graph);
}
}
}
for (SemanticGraph graph : graphs) {
SemgrexMatcher matcher = semgrex.matcher(graph);
if (!(matcher.find())) {
continue;
}
if (outputFormat == OutputFormat.LIST) {
log.info("Matched graph:");
log.info(graph.toString(SemanticGraph.OutputFormat.LIST));
boolean found = true;
while (found) {
log.info("Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
List<String> nodeNames = Generics.newArrayList();
nodeNames.addAll(matcher.getNodeNames());
Collections.sort(nodeNames);
for (String name : nodeNames) {
log.info(" " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index());
}
log.info();
found = matcher.find();
}
} else if (outputFormat == OutputFormat.OFFSET) {
if (graph.vertexListSorted().isEmpty()) {
continue;
}
System.out.printf("+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]);
}
}
}
Aggregations