use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.
the class ReadSentimentDataset method convertTree.
public static Tree convertTree(List<Integer> parentPointers, List<String> sentence, Map<List<String>, Integer> phraseIds, Map<Integer, Double> sentimentScores, PTBEscapingProcessor escaper) {
int maxNode = 0;
for (Integer parent : parentPointers) {
maxNode = Math.max(maxNode, parent);
}
Tree[] subtrees = new Tree[maxNode + 1];
for (int i = 0; i < sentence.size(); ++i) {
CoreLabel word = new CoreLabel();
word.setValue(sentence.get(i));
Tree leaf = new LabeledScoredTreeNode(word);
subtrees[i] = new LabeledScoredTreeNode(new CoreLabel());
subtrees[i].addChild(leaf);
}
for (int i = sentence.size(); i <= maxNode; ++i) {
subtrees[i] = new LabeledScoredTreeNode(new CoreLabel());
}
boolean[] connected = new boolean[maxNode + 1];
Tree root = null;
for (int index = 0; index < parentPointers.size(); ++index) {
if (parentPointers.get(index) == -1) {
if (root != null) {
throw new RuntimeException("Found two roots for sentence " + sentence);
}
root = subtrees[index];
} else {
// Walk up the tree structure to make sure that leftmost
// phrases are added first. Otherwise, if the numbers are
// inverted, we might get the right phrase added to a parent
// first, resulting in "case zero in this", for example,
// instead of "in this case zero"
// Note that because we keep track of which ones are already
// connected, we process this at most once per parent, so the
// overall construction time is still efficient.
connect(parentPointers, subtrees, connected, index);
}
}
for (int i = 0; i <= maxNode; ++i) {
List<Tree> leaves = subtrees[i].getLeaves();
List<String> words = CollectionUtils.transformAsList(leaves, TRANSFORM_TREE_TO_WORD);
// First we look for a copy of the phrase with -LRB- -RRB-
// instead of (). The sentiment trees sometimes have both, and
// the escaped versions seem to have more reasonable scores.
// If a particular phrase doesn't have -LRB- -RRB- we fall back
// to the unescaped versions.
Integer phraseId = phraseIds.get(CollectionUtils.transformAsList(words, TRANSFORM_PARENS));
if (phraseId == null) {
phraseId = phraseIds.get(words);
}
if (phraseId == null) {
throw new RuntimeException("Could not find phrase id for phrase " + sentence);
}
// TODO: should we make this an option? Perhaps we want cases
// where the trees have the phrase id and not their class
Double score = sentimentScores.get(phraseId);
if (score == null) {
throw new RuntimeException("Could not find sentiment score for phrase id " + phraseId);
}
// TODO: make this a numClasses option
int classLabel = Math.round((float) Math.floor(score * 5.0));
if (classLabel > 4) {
classLabel = 4;
}
subtrees[i].label().setValue(Integer.toString(classLabel));
}
for (int i = 0; i < sentence.size(); ++i) {
Tree leaf = subtrees[i].children()[0];
leaf.label().setValue(escaper.escapeString(leaf.label().value()));
}
for (int i = 0; i < tregexPatterns.length; ++i) {
root = Tsurgeon.processPattern(tregexPatterns[i], tsurgeonPatterns[i], root);
}
return root;
}
use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.
the class ShiftReduceParser method initialStateFromTaggedSentence.
public static State initialStateFromTaggedSentence(List<? extends HasWord> words) {
List<Tree> preterminals = Generics.newArrayList();
for (int index = 0; index < words.size(); ++index) {
HasWord hw = words.get(index);
CoreLabel wordLabel;
String tag;
if (hw instanceof CoreLabel) {
wordLabel = (CoreLabel) hw;
tag = wordLabel.tag();
} else {
wordLabel = new CoreLabel();
wordLabel.setValue(hw.word());
wordLabel.setWord(hw.word());
if (!(hw instanceof HasTag)) {
throw new IllegalArgumentException("Expected tagged words");
}
tag = ((HasTag) hw).tag();
wordLabel.setTag(tag);
}
if (tag == null) {
throw new IllegalArgumentException("Input word not tagged");
}
CoreLabel tagLabel = new CoreLabel();
tagLabel.setValue(tag);
// Index from 1. Tools downstream from the parser expect that
// Internally this parser uses the index, so we have to
// overwrite incorrect indices if the label is already indexed
wordLabel.setIndex(index + 1);
tagLabel.setIndex(index + 1);
LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel);
LabeledScoredTreeNode tagNode = new LabeledScoredTreeNode(tagLabel);
tagNode.addChild(wordNode);
// TODO: can we get away with not setting these on the wordLabel?
wordLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel);
wordLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel);
tagLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel);
tagLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel);
preterminals.add(tagNode);
}
return new State(preterminals);
}
use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.
the class BinaryTransition method apply.
/**
* Add a binary node to the existing node on top of the stack
*/
public State apply(State state, double scoreDelta) {
TreeShapedStack<Tree> stack = state.stack;
Tree right = stack.peek();
stack = stack.pop();
Tree left = stack.peek();
stack = stack.pop();
Tree head;
switch(side) {
case LEFT:
head = left;
break;
case RIGHT:
head = right;
break;
default:
throw new IllegalArgumentException("Unknown side " + side);
}
if (!(head.label() instanceof CoreLabel)) {
throw new IllegalArgumentException("Stack should have CoreLabel nodes");
}
CoreLabel headLabel = (CoreLabel) head.label();
CoreLabel production = new CoreLabel();
production.setValue(label);
production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
Tree newTop = new LabeledScoredTreeNode(production);
newTop.addChild(left);
newTop.addChild(right);
stack = stack.push(newTop);
return new State(stack, state.transitions.push(this), state.separators, state.sentence, state.tokenPosition, state.score + scoreDelta, false);
}
use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.
the class UnaryTransition method createNode.
static Tree createNode(Tree top, String label, Tree... children) {
CoreLabel headLabel = (CoreLabel) top.label();
CoreLabel production = new CoreLabel();
production.setValue(label);
production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
Tree newTop = new LabeledScoredTreeNode(production);
for (Tree child : children) {
newTop.addChild(child);
}
return newTop;
}
use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.
the class ProtobufAnnotationSerializer method fromProto.
/**
* Retrieve a Tree object from a saved protobuf.
* This is not intended to be used on its own, but it is safe (lossless) to do so and therefore it is
* left visible.
*
* @param proto The serialized tree.
* @return A Tree object corresponding to the saved tree. This will always be a {@link LabeledScoredTreeNode}.
*/
public Tree fromProto(CoreNLPProtos.ParseTree proto) {
if (Thread.interrupted()) {
throw new RuntimeInterruptedException();
}
LabeledScoredTreeNode node = new LabeledScoredTreeNode();
// Set label
if (proto.hasValue()) {
CoreLabel value = new CoreLabel();
value.setCategory(proto.getValue());
value.setValue(proto.getValue());
node.setLabel(value);
// Set span
if (proto.hasYieldBeginIndex() && proto.hasYieldEndIndex()) {
IntPair span = new IntPair(proto.getYieldBeginIndex(), proto.getYieldEndIndex());
value.set(SpanAnnotation.class, span);
}
// Set sentiment
if (proto.hasSentiment()) {
value.set(RNNCoreAnnotations.PredictedClass.class, proto.getSentiment().getNumber());
}
}
// Set score
if (proto.hasScore()) {
node.setScore(proto.getScore());
}
// Set children
Tree[] children = new LabeledScoredTreeNode[proto.getChildCount()];
for (int i = 0; i < children.length; ++i) {
children[i] = fromProto(proto.getChild(i));
}
node.setChildren(children);
// Return
return node;
}
Aggregations