use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.
the class DependencyParser method parseTextFile.
private void parseTextFile(BufferedReader input, PrintWriter output) {
DocumentPreprocessor preprocessor = new DocumentPreprocessor(input);
preprocessor.setSentenceFinalPuncWords(config.tlp.sentenceFinalPunctuationWords());
preprocessor.setEscaper(config.escaper);
preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());
Timing timer = new Timing();
MaxentTagger tagger = new MaxentTagger(config.tagger);
List<List<TaggedWord>> tagged = new ArrayList<>();
for (List<HasWord> sentence : preprocessor) {
tagged.add(tagger.tagSentence(sentence));
}
System.err.printf("Tagging completed in %.2f sec.%n", timer.stop() / 1000.0);
timer.start();
int numSentences = 0;
for (List<TaggedWord> taggedSentence : tagged) {
GrammaticalStructure parse = predict(taggedSentence);
Collection<TypedDependency> deps = parse.typedDependencies();
for (TypedDependency dep : deps) output.println(dep);
output.println();
numSentences++;
}
long millis = timer.stop();
double seconds = millis / 1000.0;
System.err.printf("Parsed %d sentences in %.2f seconds (%.2f sents/sec).%n", numSentences, seconds, numSentences / seconds);
}
use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.
the class DependencyParserDemo method main.
public static void main(String[] args) {
String modelPath = DependencyParser.DEFAULT_MODEL;
String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
for (int argIndex = 0; argIndex < args.length; ) {
switch(args[argIndex]) {
case "-tagger":
taggerPath = args[argIndex + 1];
argIndex += 2;
break;
case "-model":
modelPath = args[argIndex + 1];
argIndex += 2;
break;
default:
throw new RuntimeException("Unknown argument " + args[argIndex]);
}
}
String text = "I can almost always tell when movies use fake dinosaurs.";
MaxentTagger tagger = new MaxentTagger(taggerPath);
DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged = tagger.tagSentence(sentence);
GrammaticalStructure gs = parser.predict(tagged);
// Print typed dependencies
log.info(gs);
}
}
use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.
the class DependencyIndexITest method checkTree.
private static void checkTree(Tree tree) {
List<Tree> leaves = tree.getLeaves();
for (Tree leaf : leaves) {
CoreLabel l = null;
if (leaf.label() instanceof CoreLabel)
l = (CoreLabel) leaf.label();
if (l != null) {
// System.err.println(l + " " + l.get(CoreAnnotations.IndexAnnotation.class));
int index = l.get(CoreAnnotations.IndexAnnotation.class);
String text = l.get(CoreAnnotations.TextAnnotation.class);
if (text.equals("Mary"))
assertEquals(1, index);
else if (text.equals("had"))
assertEquals(2, index);
else if (text.equals("a"))
assertEquals(3, index);
else if (text.equals("little"))
assertEquals(4, index);
else if (text.equals("lamb"))
assertEquals(5, index);
else if (text.equals("."))
assertEquals(6, index);
} else {
// System.err.println(leaf + " is not a CoreLabel.");
}
}
TreebankLanguagePack tlp = new PennTreebankLanguagePack();
GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
GrammaticalStructure gs = gsf.newGrammaticalStructure(tree);
Collection<TypedDependency> deps = gs.typedDependenciesCCprocessed(GrammaticalStructure.Extras.MAXIMAL);
// System.out.println(deps);
// collect all nodes in deps
Set<IndexedWord> nodes = Generics.newHashSet();
for (TypedDependency dep : deps) {
nodes.add(dep.gov());
nodes.add(dep.dep());
}
// check the indices for all nodes
for (IndexedWord n : nodes) {
String text = n.value();
int index = n.get(CoreAnnotations.IndexAnnotation.class);
if (text.equals("Mary"))
assertEquals(1, index);
else if (text.equals("had"))
assertEquals(2, index);
else if (text.equals("a"))
assertEquals(3, index);
else if (text.equals("little"))
assertEquals(4, index);
else if (text.equals("lamb"))
assertEquals(5, index);
else if (text.equals("."))
assertEquals(6, index);
}
}
use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.
the class DependencyParseAnnotator method doOneSentence.
@Override
protected void doOneSentence(Annotation annotation, CoreMap sentence) {
GrammaticalStructure gs = parser.predict(sentence);
SemanticGraph deps = SemanticGraphFactory.makeFromTree(gs, Mode.COLLAPSED, extraDependencies, null), uncollapsedDeps = SemanticGraphFactory.makeFromTree(gs, Mode.BASIC, extraDependencies, null), ccDeps = SemanticGraphFactory.makeFromTree(gs, Mode.CCPROCESSED, extraDependencies, null), enhancedDeps = SemanticGraphFactory.makeFromTree(gs, Mode.ENHANCED, extraDependencies, null), enhancedPlusPlusDeps = SemanticGraphFactory.makeFromTree(gs, Mode.ENHANCED_PLUS_PLUS, extraDependencies, null);
sentence.set(SemanticGraphCoreAnnotations.CollapsedDependenciesAnnotation.class, deps);
sentence.set(SemanticGraphCoreAnnotations.BasicDependenciesAnnotation.class, uncollapsedDeps);
sentence.set(SemanticGraphCoreAnnotations.CollapsedCCProcessedDependenciesAnnotation.class, ccDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedDependenciesAnnotation.class, enhancedDeps);
sentence.set(SemanticGraphCoreAnnotations.EnhancedPlusPlusDependenciesAnnotation.class, enhancedPlusPlusDeps);
}
use of edu.stanford.nlp.trees.GrammaticalStructure in project CoreNLP by stanfordnlp.
the class LexicalizedParserServer method handleDependencies.
// TODO: when this method throws an exception (for whatever reason)
// a waiting client might hang. There should be some graceful
// handling of that.
public void handleDependencies(String arg, OutputStream outStream, String commandArgs) throws IOException {
Tree tree = parse(arg, false);
if (tree == null) {
return;
}
// TODO: this might throw an exception if the parser doesn't support dependencies. Handle that cleaner?
GrammaticalStructure gs = parser.getTLPParams().getGrammaticalStructure(tree, parser.treebankLanguagePack().punctuationWordRejectFilter(), parser.getTLPParams().typedDependencyHeadFinder());
Collection<TypedDependency> deps = null;
switch(commandArgs.toUpperCase()) {
case "COLLAPSED_TREE":
deps = gs.typedDependenciesCollapsedTree();
break;
default:
throw new UnsupportedOperationException("Dependencies type not implemented: " + commandArgs);
}
OutputStreamWriter osw = new OutputStreamWriter(outStream, "utf-8");
for (TypedDependency dep : deps) {
osw.write(dep.toString());
osw.write("\n");
}
osw.flush();
}
Aggregations