use of edu.stanford.nlp.trees.MemoryTreebank in project CoreNLP by stanfordnlp.
the class SemgrexPattern method main.
/**
* Prints out all matches of a semgrex pattern on a file of dependencies.
* <br>
* Usage:<br>
* java edu.stanford.nlp.semgraph.semgrex.SemgrexPattern [args]
* <br>
* See the help() function for a list of possible arguments to provide.
*/
public static void main(String[] args) throws IOException {
Map<String, Integer> flagMap = Generics.newHashMap();
flagMap.put(PATTERN, 1);
flagMap.put(TREE_FILE, 1);
flagMap.put(MODE, 1);
flagMap.put(EXTRAS, 1);
flagMap.put(CONLLU_FILE, 1);
flagMap.put(OUTPUT_FORMAT_OPTION, 1);
Map<String, String[]> argsMap = StringUtils.argsToMap(args, flagMap);
args = argsMap.get(null);
// TODO: allow patterns to be extracted from a file
if (!(argsMap.containsKey(PATTERN)) || argsMap.get(PATTERN).length == 0) {
help();
System.exit(2);
}
SemgrexPattern semgrex = SemgrexPattern.compile(argsMap.get(PATTERN)[0]);
String modeString = DEFAULT_MODE;
if (argsMap.containsKey(MODE) && argsMap.get(MODE).length > 0) {
modeString = argsMap.get(MODE)[0].toUpperCase();
}
SemanticGraphFactory.Mode mode = SemanticGraphFactory.Mode.valueOf(modeString);
String outputFormatString = DEFAULT_OUTPUT_FORMAT;
if (argsMap.containsKey(OUTPUT_FORMAT_OPTION) && argsMap.get(OUTPUT_FORMAT_OPTION).length > 0) {
outputFormatString = argsMap.get(OUTPUT_FORMAT_OPTION)[0].toUpperCase();
}
OutputFormat outputFormat = OutputFormat.valueOf(outputFormatString);
boolean useExtras = true;
if (argsMap.containsKey(EXTRAS) && argsMap.get(EXTRAS).length > 0) {
useExtras = Boolean.valueOf(argsMap.get(EXTRAS)[0]);
}
List<SemanticGraph> graphs = Generics.newArrayList();
// TODO: allow other sources of graphs, such as dependency files
if (argsMap.containsKey(TREE_FILE) && argsMap.get(TREE_FILE).length > 0) {
for (String treeFile : argsMap.get(TREE_FILE)) {
log.info("Loading file " + treeFile);
MemoryTreebank treebank = new MemoryTreebank(new TreeNormalizer());
treebank.loadPath(treeFile);
for (Tree tree : treebank) {
// TODO: allow other languages... this defaults to English
SemanticGraph graph = SemanticGraphFactory.makeFromTree(tree, mode, useExtras ? GrammaticalStructure.Extras.MAXIMAL : GrammaticalStructure.Extras.NONE);
graphs.add(graph);
}
}
}
if (argsMap.containsKey(CONLLU_FILE) && argsMap.get(CONLLU_FILE).length > 0) {
CoNLLUDocumentReader reader = new CoNLLUDocumentReader();
for (String conlluFile : argsMap.get(CONLLU_FILE)) {
log.info("Loading file " + conlluFile);
Iterator<SemanticGraph> it = reader.getIterator(IOUtils.readerFromString(conlluFile));
while (it.hasNext()) {
SemanticGraph graph = it.next();
graphs.add(graph);
}
}
}
for (SemanticGraph graph : graphs) {
SemgrexMatcher matcher = semgrex.matcher(graph);
if (!(matcher.find())) {
continue;
}
if (outputFormat == OutputFormat.LIST) {
log.info("Matched graph:");
log.info(graph.toString(SemanticGraph.OutputFormat.LIST));
boolean found = true;
while (found) {
log.info("Matches at: " + matcher.getMatch().value() + "-" + matcher.getMatch().index());
List<String> nodeNames = Generics.newArrayList();
nodeNames.addAll(matcher.getNodeNames());
Collections.sort(nodeNames);
for (String name : nodeNames) {
log.info(" " + name + ": " + matcher.getNode(name).value() + "-" + matcher.getNode(name).index());
}
log.info();
found = matcher.find();
}
} else if (outputFormat == OutputFormat.OFFSET) {
if (graph.vertexListSorted().isEmpty()) {
continue;
}
System.out.printf("+%d %s%n", graph.vertexListSorted().get(0).get(CoreAnnotations.LineNumberAnnotation.class), argsMap.get(CONLLU_FILE)[0]);
}
}
}
use of edu.stanford.nlp.trees.MemoryTreebank in project CoreNLP by stanfordnlp.
the class SemanticGraphPrinter method main.
public static void main(String[] args) {
Treebank tb = new MemoryTreebank();
Properties props = StringUtils.argsToProperties(args);
String treeFileName = props.getProperty("treeFile");
String sentFileName = props.getProperty("sentFile");
String testGraph = props.getProperty("testGraph");
if (testGraph == null) {
testGraph = "false";
}
String load = props.getProperty("load");
String save = props.getProperty("save");
if (load != null) {
log.info("Load not implemented!");
return;
}
if (sentFileName == null && treeFileName == null) {
log.info("Usage: java SemanticGraph [-sentFile file|-treeFile file] [-testGraph]");
Tree t = Tree.valueOf("(ROOT (S (NP (NP (DT An) (NN attempt)) (PP (IN on) (NP (NP (NNP Andres) (NNP Pastrana) (POS 's)) (NN life)))) (VP (VBD was) (VP (VBN carried) (PP (IN out) (S (VP (VBG using) (NP (DT a) (JJ powerful) (NN bomb))))))) (. .)))");
tb.add(t);
} else if (treeFileName != null) {
tb.loadPath(treeFileName);
} else {
String[] options = { "-retainNPTmpSubcategories" };
LexicalizedParser lp = LexicalizedParser.loadModel("/u/nlp/data/lexparser/englishPCFG.ser.gz", options);
BufferedReader reader = null;
try {
reader = IOUtils.readerFromString(sentFileName);
} catch (IOException e) {
throw new RuntimeIOException("Cannot find or open " + sentFileName, e);
}
try {
System.out.println("Processing sentence file " + sentFileName);
for (String line; (line = reader.readLine()) != null; ) {
System.out.println("Processing sentence: " + line);
PTBTokenizer<Word> ptb = PTBTokenizer.newPTBTokenizer(new StringReader(line));
List<Word> words = ptb.tokenize();
Tree parseTree = lp.parseTree(words);
tb.add(parseTree);
}
reader.close();
} catch (Exception e) {
throw new RuntimeException("Exception reading key file " + sentFileName, e);
}
}
for (Tree t : tb) {
SemanticGraph sg = SemanticGraphFactory.generateUncollapsedDependencies(t);
System.out.println(sg.toString());
System.out.println(sg.toCompactString());
if (testGraph.equals("true")) {
SemanticGraph g1 = SemanticGraphFactory.generateCollapsedDependencies(t);
System.out.println("TEST SEMANTIC GRAPH - graph ----------------------------");
System.out.println(g1.toString());
System.out.println("readable ----------------------------");
System.out.println(g1.toString(SemanticGraph.OutputFormat.READABLE));
System.out.println("List of dependencies ----------------------------");
System.out.println(g1.toList());
System.out.println("xml ----------------------------");
System.out.println(g1.toString(SemanticGraph.OutputFormat.XML));
System.out.println("dot ----------------------------");
System.out.println(g1.toDotFormat());
System.out.println("dot (simple) ----------------------------");
System.out.println(g1.toDotFormat("Simple", CoreLabel.OutputFormat.VALUE));
// System.out.println(" graph ----------------------------");
// System.out.println(t.allTypedDependenciesCCProcessed(false));
}
}
if (save != null) {
log.info("Save not implemented!");
}
}
use of edu.stanford.nlp.trees.MemoryTreebank in project CoreNLP by stanfordnlp.
the class OracleTest method buildTestTreebank.
public List<Tree> buildTestTreebank() {
MemoryTreebank treebank = new MemoryTreebank();
for (String text : TEST_TREES) {
Tree tree = Tree.valueOf(text);
treebank.add(tree);
}
List<Tree> binarizedTrees = ShiftReduceParser.binarizeTreebank(treebank, new Options());
return binarizedTrees;
}
Aggregations