use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.
the class DependencyParser method parseTextFile.
private void parseTextFile(BufferedReader input, PrintWriter output) {
DocumentPreprocessor preprocessor = new DocumentPreprocessor(input);
preprocessor.setSentenceFinalPuncWords(config.tlp.sentenceFinalPunctuationWords());
preprocessor.setEscaper(config.escaper);
preprocessor.setSentenceDelimiter(config.sentenceDelimiter);
preprocessor.setTokenizerFactory(config.tlp.getTokenizerFactory());
Timing timer = new Timing();
MaxentTagger tagger = new MaxentTagger(config.tagger);
List<List<TaggedWord>> tagged = new ArrayList<>();
for (List<HasWord> sentence : preprocessor) {
tagged.add(tagger.tagSentence(sentence));
}
System.err.printf("Tagging completed in %.2f sec.%n", timer.stop() / 1000.0);
timer.start();
int numSentences = 0;
for (List<TaggedWord> taggedSentence : tagged) {
GrammaticalStructure parse = predict(taggedSentence);
Collection<TypedDependency> deps = parse.typedDependencies();
for (TypedDependency dep : deps) output.println(dep);
output.println();
numSentences++;
}
long millis = timer.stop();
double seconds = millis / 1000.0;
System.err.printf("Parsed %d sentences in %.2f seconds (%.2f sents/sec).%n", numSentences, seconds, numSentences / seconds);
}
use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.
the class DependencyParserDemo method main.
public static void main(String[] args) {
String modelPath = DependencyParser.DEFAULT_MODEL;
String taggerPath = "edu/stanford/nlp/models/pos-tagger/english-left3words/english-left3words-distsim.tagger";
for (int argIndex = 0; argIndex < args.length; ) {
switch(args[argIndex]) {
case "-tagger":
taggerPath = args[argIndex + 1];
argIndex += 2;
break;
case "-model":
modelPath = args[argIndex + 1];
argIndex += 2;
break;
default:
throw new RuntimeException("Unknown argument " + args[argIndex]);
}
}
String text = "I can almost always tell when movies use fake dinosaurs.";
MaxentTagger tagger = new MaxentTagger(taggerPath);
DependencyParser parser = DependencyParser.loadFromModelFile(modelPath);
DocumentPreprocessor tokenizer = new DocumentPreprocessor(new StringReader(text));
for (List<HasWord> sentence : tokenizer) {
List<TaggedWord> tagged = tagger.tagSentence(sentence);
GrammaticalStructure gs = parser.predict(tagged);
// Print typed dependencies
log.info(gs);
}
}
use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.
the class ScrollableTreeJPanel method renderRows.
private void renderRows(Graphics2D g2, FontMetrics fM, Color defaultColor2) {
double nodeHeight = fM.getHeight();
double layerMultiplier = (1.0 + belowLineSkip + aboveLineSkip + parentSkip);
double layerHeight = nodeHeight * layerMultiplier;
//Draw the yield
List<HasWord> sentence = tree.yieldHasWord();
for (int i = 0; i < sentence.size(); i++) {
g2.drawString(sentence.get(i).word(), yieldOffsets[i], (float) (yieldHeight + layerHeight));
}
//Greedily draw the constituents
final float rowOrigin = (float) (yieldHeight + 2.0 * layerHeight);
List<List<IntPair>> rows = new ArrayList<>();
for (Constituent c : diffConstituents) {
for (int rowIdx = 0; rowIdx < diffConstituents.size(); rowIdx++) {
float rowHeight = rowOrigin + (float) (rowIdx * layerHeight);
int ext = (c.end() == (yieldOffsets.length - 1)) ? 0 : 1;
if (rowIdx >= rows.size()) {
rows.add(new ArrayList<>());
rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
double nodeWidth = fM.stringWidth(c.value());
g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
try {
g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
} catch (ArrayIndexOutOfBoundsException e) {
// This happens if yield of two compared trees do not match. Just ignore it for now
// System.err.printf("yieldOffsets.length is %d, c.start() is %d, c.end() is %d, ext is %d%n", yieldOffsets.length, c.start(), c.end(), ext);
}
break;
} else {
boolean foundOverlap = false;
for (IntPair span : rows.get(rowIdx)) {
if (doesOverlap(c, span)) {
foundOverlap = true;
break;
}
}
if (!foundOverlap) {
rows.get(rowIdx).add(new IntPair(c.start(), c.end()));
double nodeWidth = fM.stringWidth(c.value());
g2.drawString(c.value(), yieldOffsets[c.start()], rowHeight);
g2.drawLine((int) (yieldOffsets[c.start()] + nodeWidth) + 10, (int) rowHeight, (int) (yieldOffsets[c.end() + ext]) - 15, (int) rowHeight);
break;
}
}
}
}
}
use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.
the class UniversalSemanticHeadFinder method isVerbalAuxiliary.
private boolean isVerbalAuxiliary(Tree preterminal, Set<String> verbalSet, boolean allowJustTagMatch) {
if (preterminal.isPreTerminal()) {
Label kidLabel = preterminal.label();
String tag = null;
if (kidLabel instanceof HasTag) {
tag = ((HasTag) kidLabel).tag();
}
if (tag == null) {
tag = preterminal.value();
}
Label wordLabel = preterminal.firstChild().label();
String word = null;
if (wordLabel instanceof HasWord) {
word = ((HasWord) wordLabel).word();
}
if (word == null) {
word = wordLabel.value();
}
if (DEBUG) {
log.info("Checking " + preterminal.value() + " head is " + word + '/' + tag);
}
String lcWord = word.toLowerCase();
if (allowJustTagMatch && unambiguousAuxiliaryTags.contains(tag) || verbalTags.contains(tag) && verbalSet.contains(lcWord)) {
if (DEBUG) {
log.info("isAuxiliary found desired type of aux");
}
return true;
}
}
return false;
}
use of edu.stanford.nlp.ling.HasWord in project CoreNLP by stanfordnlp.
the class SpanishXMLTreeReader method buildWordNode.
/**
* Build a parse tree node corresponding to the word in the given XML node.
*/
private Tree buildWordNode(Node root) {
Element eRoot = (Element) root;
String posStr = getPOS(eRoot);
posStr = treeNormalizer.normalizeNonterminal(posStr);
String lemma = eRoot.getAttribute(ATTR_LEMMA);
String word = getWord(eRoot);
String leafStr = treeNormalizer.normalizeTerminal(word);
Tree leafNode = treeFactory.newLeaf(leafStr);
if (leafNode.label() instanceof HasWord)
((HasWord) leafNode.label()).setWord(leafStr);
if (leafNode.label() instanceof HasLemma && lemma != null)
((HasLemma) leafNode.label()).setLemma(lemma);
List<Tree> kids = new ArrayList<>();
kids.add(leafNode);
Tree t = treeFactory.newTreeNode(posStr, kids);
if (t.label() instanceof HasTag)
((HasTag) t.label()).setTag(posStr);
return t;
}
Aggregations