use of edu.stanford.nlp.trees.TreeNormalizer in project CoreNLP by stanfordnlp.
the class SemgrexPattern method main.
/**
* Prints out all matches of a semgrex pattern on a file of dependencies.
* <br>
* Usage:<br>
* java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
* <br>
* See the help() function for a list of possible arguments to provide.
*/
public static void main(String[] args) throws IOException {
Map<String, Integer> flagMap = Generics.newHashMap();
flagMap.put(PATTERN, 1);
flagMap.put(TREE_FILE, 1);
flagMap.put(MODE, 1);
flagMap.put(EXTRAS, 1);
flagMap.put(CONLLU_FILE, 1);
flagMap.put(OUTPUT_FORMAT_OPTION, 1);
Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
args = argsMap.get(null);
// TODO: allow patterns to be extracted from a file
if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
help();
System.exit(2);
}
SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);
String modeString = DEFAULT_MODE;
if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
modeString = argsMap.get(MODE)[0].toUpperCase();
}
SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);
String outputFormatString = DEFAULT_OUTPUT_FORMAT;
if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
}
OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);
boolean useExtras = true;
if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
}
List<SemanticGraph> graphs = Generics.newArrayList();
// TODO: allow other sources of graphs, such as dependency files
if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
for (String treeFile : argsMap.get(TREE_FILE)) {
log.info("Loading file " + treeFile);
MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
treebank.loadPath(treeFile);
for (Tree tree : treebank) {
// TODO: allow other languages... this defaults to English
SemanticGraph graph = SemanticGraphFactory.makeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE);
graphs.add(graph);
}
}
}
if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
for (String conlluFile : argsMap.get(CONLLU_FILE)) {
log.info("Loading file " + conlluFile);
Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));
while (it.hasNext()) {
SemanticGraph graph = it.next();
graphs.add(graph);
}
}
}
for (SemanticGraph graph : graphs) {
SemgrexMatcher matcher = semgrex.matcher(graph);
if (!(matcher.find())) {
continue;
}
if (outputFormat == OutputFormat.LIST) {
log.info("Matched graph:");
log.info(graph.toString(SemanticGraph.OutputFormat.LIST));
boolean found = true;
while (found) {
log.info("Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
List<String> nodeNames = Generics.newArrayList();
nodeNames.addAll(matcher.getNodeNames());
Collections.sort(nodeNames);
for (String name : nodeNames) {
log.info(" " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index());
}
log.info();
found = matcher.find();
}
} else if (outputFormat == OutputFormat.OFFSET) {
if (graph.vertexListSorted().isEmpty()) {
continue;
}
System.out.printf("+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]);
}
}
}
use of edu.stanford.nlp.trees.TreeNormalizer in project CoreNLP by stanfordnlp.
the class TaggedFileRecord method createRecord.
public static TaggedFileRecord createRecord(Properties config, String description) {
String[] pieces = description.split(",");
if (pieces.length == 1) {
return new TaggedFileRecord(description, Format.TEXT, getEncoding(config), getTagSeparator(config), null, null, null, null, null, null, null);
}
String[] args = new String[pieces.length - 1];
System.arraycopy(pieces, 0, args, 0, pieces.length - 1);
String file = pieces[pieces.length - 1];
Format format = Format.TEXT;
String encoding = getEncoding(config);
String tagSeparator = getTagSeparator(config);
TreeTransformer treeTransformer = null;
TreeNormalizer treeNormalizer = null;
TreeReaderFactory trf = null;
NumberRangesFileFilter treeRange = null;
Predicate<Tree> treeFilter = null;
Integer wordColumn = null, tagColumn = null;
for (String arg : args) {
String[] argPieces = arg.split("=", 2);
if (argPieces.length != 2) {
throw new IllegalArgumentException("TaggedFileRecord argument " + arg + " has an unexpected number of =s");
}
if (argPieces[0].equalsIgnoreCase(FORMAT)) {
format = Format.valueOf(argPieces[1]);
} else if (argPieces[0].equalsIgnoreCase(ENCODING)) {
encoding = argPieces[1];
} else if (argPieces[0].equalsIgnoreCase(TAG_SEPARATOR)) {
tagSeparator = argPieces[1];
} else if (argPieces[0].equalsIgnoreCase(TREE_TRANSFORMER)) {
treeTransformer = ReflectionLoading.loadByReflection(argPieces[1]);
} else if (argPieces[0].equalsIgnoreCase(TREE_NORMALIZER)) {
treeNormalizer = ReflectionLoading.loadByReflection(argPieces[1]);
} else if (argPieces[0].equalsIgnoreCase(TREE_READER)) {
trf = ReflectionLoading.loadByReflection(argPieces[1]);
} else if (argPieces[0].equalsIgnoreCase(TREE_RANGE)) {
String range = argPieces[1].replaceAll(":", ",");
treeRange = new NumberRangesFileFilter(range, true);
} else if (argPieces[0].equalsIgnoreCase(TREE_FILTER)) {
treeFilter = ReflectionLoading.loadByReflection(argPieces[1]);
} else if (argPieces[0].equalsIgnoreCase(WORD_COLUMN)) {
wordColumn = Integer.valueOf(argPieces[1]);
} else if (argPieces[0].equalsIgnoreCase(TAG_COLUMN)) {
tagColumn = Integer.valueOf(argPieces[1]);
} else {
throw new IllegalArgumentException("TaggedFileRecord argument " + argPieces[0] + " is unknown");
}
}
return new TaggedFileRecord(file, format, encoding, tagSeparator, treeTransformer, treeNormalizer, trf, treeRange, treeFilter, wordColumn, tagColumn);
}
Aggregations