Search in sources :

Example 1 with HasCategory

use of edu.stanford.nlp.ling.HasCategory in project CoreNLP by stanfordnlp.

the class TreeAnnotator method transformTreeHelper.

/**
 * Do the category splitting of the tree passed in.
 * This is initially called on the root node of a tree, and it recursively
 * calls itself on children.  A depth first left-to-right traversal is
 * done whereby a tree node's children are first transformed and then
 * the parent is transformed.  At the time of calling, the original root
 * always sits above the current node.  This routine can be assumed to,
 * and does, change the tree passed in: it destructively modifies tree nodes,
 * and makes new tree structure when it needs to.
 *
 * @param t The tree node to subcategorize.
 * @param root The root of the tree.  It must contain {@code t} or
 *     this code will throw a NullPointerException.
 * @return The annotated tree.
 */
private Tree transformTreeHelper(Tree t, Tree root) {
    if (t == null) {
        // handle null
        return null;
    }
    if (t.isLeaf()) {
        // No need to change the label
        return t;
    }
    String cat = t.label().value();
    Tree parent;
    String parentStr;
    String grandParentStr;
    if (root == null || t.equals(root)) {
        parent = null;
        parentStr = "";
    } else {
        parent = t.parent(root);
        parentStr = parent.label().value();
    }
    if (parent == null || parent.equals(root)) {
        grandParentStr = "";
    } else {
        grandParentStr = parent.parent(root).label().value();
    }
    String baseParentStr = tlpParams.treebankLanguagePack().basicCategory(parentStr);
    String baseGrandParentStr = tlpParams.treebankLanguagePack().basicCategory(grandParentStr);
    if (t.isPreTerminal()) {
        // handle tags
        // recurse
        Tree childResult = transformTreeHelper(t.children()[0], null);
        // would be nicer if Word/CWT ??
        String word = childResult.value();
        if (!trainOptions.noTagSplit) {
            if (trainOptions.tagPA) {
                String test = cat + "^" + baseParentStr;
                if (!trainOptions.tagSelectiveSplit || trainOptions.splitters.contains(test)) {
                    cat = test;
                }
            }
            if (trainOptions.markUnaryTags && parent.numChildren() == 1) {
                cat = cat + "^U";
            }
        }
        // otherwise, leave the tags alone!
        // Label label = new CategoryWordTag(cat, word, cat);
        Label label = t.label().labelFactory().newLabel(t.label());
        label.setValue(cat);
        if (label instanceof HasCategory)
            ((HasCategory) label).setCategory(cat);
        if (label instanceof HasWord)
            ((HasWord) label).setWord(word);
        if (label instanceof HasTag)
            ((HasTag) label).setTag(cat);
        t.setLabel(label);
        // just in case word is changed
        t.setChild(0, childResult);
        if (trainOptions.noTagSplit) {
            return t;
        } else {
            // language-specific transforms
            return tlpParams.transformTree(t, root);
        }
    }
    // end isPreTerminal()
    // handle phrasal categories
    Tree[] kids = t.children();
    for (int childNum = 0; childNum < kids.length; childNum++) {
        Tree child = kids[childNum];
        // recursive call
        Tree childResult = transformTreeHelper(child, root);
        t.setChild(childNum, childResult);
    }
    Tree headChild = hf.determineHead(t);
    if (headChild == null || headChild.label() == null) {
        throw new RuntimeException("TreeAnnotator: null head found for tree [suggesting incomplete/wrong HeadFinder]:\n" + t);
    }
    Label headLabel = headChild.label();
    if (!(headLabel instanceof HasWord))
        throw new RuntimeException("TreeAnnotator: Head label lacks a Word annotation!");
    if (!(headLabel instanceof HasTag))
        throw new RuntimeException("TreeAnnotator: Head label lacks a Tag annotation!");
    String word = ((HasWord) headLabel).word();
    String tag = ((HasTag) headLabel).tag();
    // String baseTag = tlpParams.treebankLanguagePack().basicCategory(tag);
    String baseCat = tlpParams.treebankLanguagePack().basicCategory(cat);
    /* Sister annotation. Potential problem: if multiple sisters are
     * strong indicators for a single category's expansions.  This
     * happens concretely in the Chinese Treebank when NP (object)
     * has left sisters VV and AS.  Could lead to too much
     * sparseness.  The ideal solution would be to give the
     * splitting list an ordering, and take only the highest (~most
     * informative/reliable) sister annotation.
     */
    if (trainOptions.sisterAnnotate && !trainOptions.smoothing && baseParentStr.length() > 0) {
        List<String> leftSis = listBasicCategories(SisterAnnotationStats.leftSisterLabels(t, parent));
        List<String> rightSis = listBasicCategories(SisterAnnotationStats.rightSisterLabels(t, parent));
        List<String> leftAnn = new ArrayList<>();
        List<String> rightAnn = new ArrayList<>();
        for (String s : leftSis) {
            // s = baseCat+"=l="+tlpParams.treebankLanguagePack().basicCategory(s);
            leftAnn.add(baseCat + "=l=" + tlpParams.treebankLanguagePack().basicCategory(s));
        // System.out.println("left-annotated test string " + s);
        }
        for (String s : rightSis) {
            // s = baseCat+"=r="+tlpParams.treebankLanguagePack().basicCategory(s);
            rightAnn.add(baseCat + "=r=" + tlpParams.treebankLanguagePack().basicCategory(s));
        }
        for (Iterator<String> j = rightAnn.iterator(); j.hasNext(); ) {
        // System.out.println("new rightsis " + (String)j.next()); //debugging
        }
        for (String annCat : trainOptions.sisterSplitters) {
            // System.out.println("annotated test string " + annCat);
            if (leftAnn.contains(annCat) || rightAnn.contains(annCat)) {
                cat = cat + annCat.replaceAll("^" + baseCat, "");
                break;
            }
        }
    }
    if (trainOptions.PA && !trainOptions.smoothing && baseParentStr.length() > 0) {
        String cat2 = baseCat + "^" + baseParentStr;
        if (!trainOptions.selectiveSplit || trainOptions.splitters.contains(cat2)) {
            cat = cat + "^" + baseParentStr;
        }
    }
    if (trainOptions.gPA && !trainOptions.smoothing && grandParentStr.length() > 0) {
        if (trainOptions.selectiveSplit) {
            String cat2 = baseCat + "^" + baseParentStr + "~" + baseGrandParentStr;
            if (cat.contains("^") && trainOptions.splitters.contains(cat2)) {
                cat = cat + "~" + baseGrandParentStr;
            }
        } else {
            cat = cat + "~" + baseGrandParentStr;
        }
    }
    if (trainOptions.markUnary > 0) {
        if (trainOptions.markUnary == 1 && kids.length == 1 && kids[0].depth() >= 2) {
            cat = cat + "-U";
        } else if (trainOptions.markUnary == 2 && parent != null && parent.numChildren() == 1 && t.depth() >= 2) {
            cat = cat + "-u";
        }
    }
    if (trainOptions.rightRec && rightRec(t, baseCat)) {
        cat = cat + "-R";
    }
    if (trainOptions.leftRec && leftRec(t, baseCat)) {
        cat = cat + "-L";
    }
    if (trainOptions.splitPrePreT && t.isPrePreTerminal()) {
        cat = cat + "-PPT";
    }
    // Label label = new CategoryWordTag(cat, word, tag);
    Label label = t.label().labelFactory().newLabel(t.label());
    label.setValue(cat);
    if (label instanceof HasCategory)
        ((HasCategory) label).setCategory(cat);
    if (label instanceof HasWord)
        ((HasWord) label).setWord(word);
    if (label instanceof HasTag)
        ((HasTag) label).setTag(tag);
    t.setLabel(label);
    return tlpParams.transformTree(t, root);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) Label(edu.stanford.nlp.ling.Label) ArrayList(java.util.ArrayList) HasCategory(edu.stanford.nlp.ling.HasCategory) HasTag(edu.stanford.nlp.ling.HasTag)

Aggregations

HasCategory (edu.stanford.nlp.ling.HasCategory)1 HasTag (edu.stanford.nlp.ling.HasTag)1 HasWord (edu.stanford.nlp.ling.HasWord)1 Label (edu.stanford.nlp.ling.Label)1 ArrayList (java.util.ArrayList)1