Search in sources :

Example 1 with LabeledScoredTreeNode

use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.

the class ReadSentimentDataset method convertTree.

public static Tree convertTree(List<Integer> parentPointers, List<String> sentence, Map<List<String>, Integer> phraseIds, Map<Integer, Double> sentimentScores, PTBEscapingProcessor escaper) {
    int maxNode = 0;
    for (Integer parent : parentPointers) {
        maxNode = Math.max(maxNode, parent);
    }
    Tree[] subtrees = new Tree[maxNode + 1];
    for (int i = 0; i < sentence.size(); ++i) {
        CoreLabel word = new CoreLabel();
        word.setValue(sentence.get(i));
        Tree leaf = new LabeledScoredTreeNode(word);
        subtrees[i] = new LabeledScoredTreeNode(new CoreLabel());
        subtrees[i].addChild(leaf);
    }
    for (int i = sentence.size(); i <= maxNode; ++i) {
        subtrees[i] = new LabeledScoredTreeNode(new CoreLabel());
    }
    boolean[] connected = new boolean[maxNode + 1];
    Tree root = null;
    for (int index = 0; index < parentPointers.size(); ++index) {
        if (parentPointers.get(index) == -1) {
            if (root != null) {
                throw new RuntimeException("Found two roots for sentence " + sentence);
            }
            root = subtrees[index];
        } else {
            // Walk up the tree structure to make sure that leftmost
            // phrases are added first.  Otherwise, if the numbers are
            // inverted, we might get the right phrase added to a parent
            // first, resulting in "case zero in this", for example,
            // instead of "in this case zero"
            // Note that because we keep track of which ones are already
            // connected, we process this at most once per parent, so the
            // overall construction time is still efficient.
            connect(parentPointers, subtrees, connected, index);
        }
    }
    for (int i = 0; i <= maxNode; ++i) {
        List<Tree> leaves = subtrees[i].getLeaves();
        List<String> words = CollectionUtils.transformAsList(leaves, TRANSFORM_TREE_TO_WORD);
        // First we look for a copy of the phrase with -LRB- -RRB-
        // instead of ().  The sentiment trees sometimes have both, and
        // the escaped versions seem to have more reasonable scores.
        // If a particular phrase doesn't have -LRB- -RRB- we fall back
        // to the unescaped versions.
        Integer phraseId = phraseIds.get(CollectionUtils.transformAsList(words, TRANSFORM_PARENS));
        if (phraseId == null) {
            phraseId = phraseIds.get(words);
        }
        if (phraseId == null) {
            throw new RuntimeException("Could not find phrase id for phrase " + sentence);
        }
        // TODO: should we make this an option?  Perhaps we want cases
        // where the trees have the phrase id and not their class
        Double score = sentimentScores.get(phraseId);
        if (score == null) {
            throw new RuntimeException("Could not find sentiment score for phrase id " + phraseId);
        }
        // TODO: make this a numClasses option
        int classLabel = Math.round((float) Math.floor(score * 5.0));
        if (classLabel > 4) {
            classLabel = 4;
        }
        subtrees[i].label().setValue(Integer.toString(classLabel));
    }
    for (int i = 0; i < sentence.size(); ++i) {
        Tree leaf = subtrees[i].children()[0];
        leaf.label().setValue(escaper.escapeString(leaf.label().value()));
    }
    for (int i = 0; i < tregexPatterns.length; ++i) {
        root = Tsurgeon.processPattern(tregexPatterns[i], tsurgeonPatterns[i], root);
    }
    return root;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode)

Example 2 with LabeledScoredTreeNode

use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.

the class ShiftReduceParser method initialStateFromTaggedSentence.

public static State initialStateFromTaggedSentence(List<? extends HasWord> words) {
    List<Tree> preterminals = Generics.newArrayList();
    for (int index = 0; index < words.size(); ++index) {
        HasWord hw = words.get(index);
        CoreLabel wordLabel;
        String tag;
        if (hw instanceof CoreLabel) {
            wordLabel = (CoreLabel) hw;
            tag = wordLabel.tag();
        } else {
            wordLabel = new CoreLabel();
            wordLabel.setValue(hw.word());
            wordLabel.setWord(hw.word());
            if (!(hw instanceof HasTag)) {
                throw new IllegalArgumentException("Expected tagged words");
            }
            tag = ((HasTag) hw).tag();
            wordLabel.setTag(tag);
        }
        if (tag == null) {
            throw new IllegalArgumentException("Input word not tagged");
        }
        CoreLabel tagLabel = new CoreLabel();
        tagLabel.setValue(tag);
        // Index from 1.  Tools downstream from the parser expect that
        // Internally this parser uses the index, so we have to
        // overwrite incorrect indices if the label is already indexed
        wordLabel.setIndex(index + 1);
        tagLabel.setIndex(index + 1);
        LabeledScoredTreeNode wordNode = new LabeledScoredTreeNode(wordLabel);
        LabeledScoredTreeNode tagNode = new LabeledScoredTreeNode(tagLabel);
        tagNode.addChild(wordNode);
        // TODO: can we get away with not setting these on the wordLabel?
        wordLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel);
        wordLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel);
        tagLabel.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, wordLabel);
        tagLabel.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, tagLabel);
        preterminals.add(tagNode);
    }
    return new State(preterminals);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) HasTag(edu.stanford.nlp.ling.HasTag) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations)

Example 3 with LabeledScoredTreeNode

use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.

the class BinaryTransition method apply.

/**
   * Add a binary node to the existing node on top of the stack
   */
public State apply(State state, double scoreDelta) {
    TreeShapedStack<Tree> stack = state.stack;
    Tree right = stack.peek();
    stack = stack.pop();
    Tree left = stack.peek();
    stack = stack.pop();
    Tree head;
    switch(side) {
        case LEFT:
            head = left;
            break;
        case RIGHT:
            head = right;
            break;
        default:
            throw new IllegalArgumentException("Unknown side " + side);
    }
    if (!(head.label() instanceof CoreLabel)) {
        throw new IllegalArgumentException("Stack should have CoreLabel nodes");
    }
    CoreLabel headLabel = (CoreLabel) head.label();
    CoreLabel production = new CoreLabel();
    production.setValue(label);
    production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
    production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
    Tree newTop = new LabeledScoredTreeNode(production);
    newTop.addChild(left);
    newTop.addChild(right);
    stack = stack.push(newTop);
    return new State(stack, state.transitions.push(this), state.separators, state.sentence, state.tokenPosition, state.score + scoreDelta, false);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations)

Example 4 with LabeledScoredTreeNode

use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.

the class UnaryTransition method createNode.

static Tree createNode(Tree top, String label, Tree... children) {
    CoreLabel headLabel = (CoreLabel) top.label();
    CoreLabel production = new CoreLabel();
    production.setValue(label);
    production.set(TreeCoreAnnotations.HeadWordLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadWordLabelAnnotation.class));
    production.set(TreeCoreAnnotations.HeadTagLabelAnnotation.class, headLabel.get(TreeCoreAnnotations.HeadTagLabelAnnotation.class));
    Tree newTop = new LabeledScoredTreeNode(production);
    for (Tree child : children) {
        newTop.addChild(child);
    }
    return newTop;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations)

Example 5 with LabeledScoredTreeNode

use of edu.stanford.nlp.trees.LabeledScoredTreeNode in project CoreNLP by stanfordnlp.

the class ProtobufAnnotationSerializer method fromProto.

/**
   * Retrieve a Tree object from a saved protobuf.
   * This is not intended to be used on its own, but it is safe (lossless) to do so and therefore it is
   * left visible.
   *
   * @param proto The serialized tree.
   * @return A Tree object corresponding to the saved tree. This will always be a {@link LabeledScoredTreeNode}.
   */
public Tree fromProto(CoreNLPProtos.ParseTree proto) {
    if (Thread.interrupted()) {
        throw new RuntimeInterruptedException();
    }
    LabeledScoredTreeNode node = new LabeledScoredTreeNode();
    // Set label
    if (proto.hasValue()) {
        CoreLabel value = new CoreLabel();
        value.setCategory(proto.getValue());
        value.setValue(proto.getValue());
        node.setLabel(value);
        // Set span
        if (proto.hasYieldBeginIndex() && proto.hasYieldEndIndex()) {
            IntPair span = new IntPair(proto.getYieldBeginIndex(), proto.getYieldEndIndex());
            value.set(SpanAnnotation.class, span);
        }
        // Set sentiment
        if (proto.hasSentiment()) {
            value.set(RNNCoreAnnotations.PredictedClass.class, proto.getSentiment().getNumber());
        }
    }
    // Set score
    if (proto.hasScore()) {
        node.setScore(proto.getScore());
    }
    // Set children
    Tree[] children = new LabeledScoredTreeNode[proto.getChildCount()];
    for (int i = 0; i < children.length; ++i) {
        children[i] = fromProto(proto.getChild(i));
    }
    node.setChildren(children);
    // Return
    return node;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) RNNCoreAnnotations(edu.stanford.nlp.neural.rnn.RNNCoreAnnotations) LabeledScoredTreeNode(edu.stanford.nlp.trees.LabeledScoredTreeNode) Tree(edu.stanford.nlp.trees.Tree)

Aggregations

CoreLabel (edu.stanford.nlp.ling.CoreLabel)5 LabeledScoredTreeNode (edu.stanford.nlp.trees.LabeledScoredTreeNode)5 Tree (edu.stanford.nlp.trees.Tree)5 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)3 HasTag (edu.stanford.nlp.ling.HasTag)1 HasWord (edu.stanford.nlp.ling.HasWord)1 RNNCoreAnnotations (edu.stanford.nlp.neural.rnn.RNNCoreAnnotations)1