Search in sources :

Example 21 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class LexicalizedParserQuery method getBestPCFGParse.

public Tree getBestPCFGParse(boolean stripSubcategories) {
    if (pparser == null || parseSkipped || parseUnparsable) {
        return null;
    }
    Tree binaryTree = pparser.getBestParse();
    if (binaryTree == null) {
        return null;
    }
    Tree t = debinarizer.transformTree(binaryTree);
    if (stripSubcategories) {
        t = subcategoryStripper.transformTree(t);
    }
    restoreOriginalWords(t);
    return t;
}
Also used : Tree(edu.stanford.nlp.trees.Tree)

Example 22 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class Tdiff method main.

/**
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 2) {
        System.out.println("Usage: java Tdiff tree1 tree2");
        return;
    }
    File tree1Path = new File(args[0]);
    File tree2Path = new File(args[1]);
    try {
        TreeReaderFactory trf = new LabeledScoredTreeReaderFactory();
        TreeReader tR1 = trf.newTreeReader(new BufferedReader(new FileReader(tree1Path)));
        TreeReader tR2 = trf.newTreeReader(new BufferedReader(new FileReader(tree2Path)));
        Tree t1 = tR1.readTree();
        Tree t2 = tR2.readTree();
        Set<Constituent> t1Diff = markDiff(t1, t2);
        System.out.println(t2.pennString());
        System.out.println();
        for (Constituent c : t1Diff) System.out.println(c);
    } catch (FileNotFoundException e) {
        log.info("File not found!");
    } catch (IOException e) {
        log.info("Unable to read file!");
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) LabeledScoredTreeReaderFactory(edu.stanford.nlp.trees.LabeledScoredTreeReaderFactory) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent)

Example 23 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class Tdiff method markDiff.

/**
   * Marks bracketings in t2 not in t1 using the DoAnnotation field.
   * Returns a list of brackets in t1 not in t2.
   *
   * @param t1
   * @param t2
   * @return A list of brackets in t1 not in t2;
   */
public static Set<Constituent> markDiff(Tree t1, Tree t2) {
    //    if (t1 == null || t2 == null || ! t1.value().equals(t2.value())) {
    //      System.err.printf("t1 value is %s; t2 value is %s; t1 is %s t2 is %s", t1.value(), t2.value(), t1, t2);
    //    }
    Set<Constituent> t1Labels = (t1 == null) ? Generics.<Constituent>newHashSet() : t1.constituents(cf);
    if (t2 != null) {
        t2.setSpans();
        for (Tree subTree : t2) {
            if (subTree.isPhrasal()) {
                IntPair span = subTree.getSpan();
                Constituent c = cf.newConstituent(span.getSource(), span.getTarget(), subTree.label(), 0.0);
                if (t1Labels.contains(c)) {
                    t1Labels.remove(c);
                    ((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, false);
                } else {
                    ((CoreLabel) subTree.label()).set(CoreAnnotations.DoAnnotation.class, true);
                }
            }
        }
    }
    return t1Labels;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree) IntPair(edu.stanford.nlp.util.IntPair) Constituent(edu.stanford.nlp.trees.Constituent) LabeledConstituent(edu.stanford.nlp.trees.LabeledConstituent)

Example 24 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class ArabicTreeNormalizer method normalizeWholeTree.

@Override
public Tree normalizeWholeTree(Tree tree, TreeFactory tf) {
    tree = tree.prune(emptyFilter, tf).spliceOut(aOverAFilter, tf);
    for (Tree t : tree) {
        if (t.isLeaf()) {
            //specified by HasContext.
            if (t.value().contains(MorphoFeatureSpecification.MORPHO_MARK)) {
                String[] toks = t.value().split(MorphoFeatureSpecification.MORPHO_MARK);
                if (toks.length != 2)
                    System.err.printf("%s: Word contains malformed morph annotation: %s%n", this.getClass().getName(), t.value());
                else if (t.label() instanceof CoreLabel) {
                    ((CoreLabel) t.label()).setValue(toks[0].trim().intern());
                    ((CoreLabel) t.label()).setWord(toks[0].trim().intern());
                    Pair<String, String> lemmaMorph = MorphoFeatureSpecification.splitMorphString(toks[0], toks[1]);
                    String lemma = lemmaMorph.first();
                    String morphAnalysis = lemmaMorph.second();
                    if (lemma.equals(toks[0])) {
                        ((CoreLabel) t.label()).setOriginalText(toks[1].trim().intern());
                    } else {
                        // TODO(speneg): Does this help?
                        String newLemma = lexMapper.map(null, lemma);
                        if (newLemma == null || newLemma.trim().length() == 0) {
                            newLemma = lemma;
                        }
                        String newMorphAnalysis = newLemma + MorphoFeatureSpecification.LEMMA_MARK + morphAnalysis;
                        ((CoreLabel) t.label()).setOriginalText(newMorphAnalysis.intern());
                    }
                } else {
                    System.err.printf("%s: Cannot store morph analysis in non-CoreLabel: %s%n", this.getClass().getName(), t.label().getClass().getName());
                }
            }
        } else if (t.isPreTerminal()) {
            if (t.value() == null || t.value().equals("")) {
                System.err.printf("%s: missing tag for\n%s\n", this.getClass().getName(), t.pennString());
            } else if (t.label() instanceof HasTag) {
                ((HasTag) t.label()).setTag(t.value());
            }
        } else {
            //Phrasal nodes
            // there are some nodes "/" missing preterminals.  We'll splice in a tag for these.
            int nk = t.numChildren();
            List<Tree> newKids = new ArrayList<>(nk);
            for (int j = 0; j < nk; j++) {
                Tree child = t.getChild(j);
                if (child.isLeaf()) {
                    System.err.printf("%s: Splicing in DUMMYTAG for%n%s%n", this.getClass().getName(), t.toString());
                    newKids.add(tf.newTreeNode("DUMMYTAG", Collections.singletonList(child)));
                } else {
                    newKids.add(child);
                }
            }
            t.setChildren(newKids);
        }
    }
    // special global coding for moving PRD annotation from constituent to verb tag.
    if (markPRDverb) {
        TregexMatcher m = prdVerbPattern.matcher(tree);
        Tree match = null;
        while (m.find()) {
            if (m.getMatch() != match) {
                match = m.getMatch();
                match.label().setValue(match.label().value() + "-PRDverb");
                Tree prd = m.getNode("prd");
                prd.label().setValue(super.normalizeNonterminal(prd.label().value()));
            }
        }
    }
    //Mark *only* subjects in verb-initial clauses
    if (retainNPSbj) {
        TregexMatcher m = npSbjPattern.matcher(tree);
        while (m.find()) {
            Tree match = m.getMatch();
            match.label().setValue("NP");
        }
    }
    if (tree.isPreTerminal()) {
        // The whole tree is a bare tag: bad!
        String val = tree.label().value();
        if (val.equals("CC") || val.startsWith("PUNC") || val.equals("CONJ")) {
            System.err.printf("%s: Bare tagged word being wrapped in FRAG\n%s\n", this.getClass().getName(), tree.pennString());
            tree = tf.newTreeNode("FRAG", Collections.singletonList(tree));
        } else {
            System.err.printf("%s: Bare tagged word\n%s\n", this.getClass().getName(), tree.pennString());
        }
    }
    //will return null. In this case, readers e.g. PennTreeReader will try to read the next tree.
    while (tree != null && (tree.value() == null || tree.value().equals("")) && tree.numChildren() <= 1) tree = tree.firstChild();
    if (tree != null && !tree.value().equals(rootLabel))
        tree = tf.newTreeNode(rootLabel, Collections.singletonList(tree));
    return tree;
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) Tree(edu.stanford.nlp.trees.Tree) HasTag(edu.stanford.nlp.ling.HasTag) ArrayList(java.util.ArrayList) List(java.util.List) TregexMatcher(edu.stanford.nlp.trees.tregex.TregexMatcher) Pair(edu.stanford.nlp.util.Pair)

Example 25 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class FrenchXMLTreeReader method readTree.

public Tree readTree() {
    Tree t = null;
    while (t == null && sentences != null && sentIdx < sentences.getLength()) {
        Node sentRoot = sentences.item(sentIdx++);
        t = getTreeFromXML(sentRoot);
        if (t != null) {
            t = treeNormalizer.normalizeWholeTree(t, treeFactory);
            if (t.label() instanceof CoreLabel) {
                String ftbId = ((Element) sentRoot).getAttribute(ATTR_NUMBER);
                ((CoreLabel) t.label()).set(CoreAnnotations.SentenceIDAnnotation.class, ftbId);
            }
        }
    }
    return t;
}
Also used : Node(org.w3c.dom.Node) Element(org.w3c.dom.Element) Tree(edu.stanford.nlp.trees.Tree)

Aggregations

Tree (edu.stanford.nlp.trees.Tree)329 CoreLabel (edu.stanford.nlp.ling.CoreLabel)99 ArrayList (java.util.ArrayList)59 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)55 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)32 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)30 CoreMap (edu.stanford.nlp.util.CoreMap)27 List (java.util.List)27 Label (edu.stanford.nlp.ling.Label)24 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)20 TreeReader (edu.stanford.nlp.trees.TreeReader)19 PrintWriter (java.io.PrintWriter)19 Language (edu.stanford.nlp.international.Language)17 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)16 Treebank (edu.stanford.nlp.trees.Treebank)16 IOException (java.io.IOException)16 Mention (edu.stanford.nlp.coref.data.Mention)15 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)15