Search in sources :

Example 26 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class FrenchXMLTreeReader method getTreeFromXML.

private Tree getTreeFromXML(Node root) {
    final Element eRoot = (Element) root;
    if (eRoot.getNodeName().equals(NODE_WORD) && eRoot.getElementsByTagName(NODE_WORD).getLength() == 0) {
        String posStr = getPOS(eRoot);
        posStr = treeNormalizer.normalizeNonterminal(posStr);
        List<String> lemmas = getLemma(eRoot);
        String morph = getMorph(eRoot);
        List<String> leafToks = getWordString(eRoot.getTextContent().trim());
        String subcat = getSubcat(eRoot);
        if (lemmas != null && lemmas.size() != leafToks.size()) {
            // If this happens (and it does for a few poorly editted trees)
            // we assume something has gone wrong and ignore the lemmas.
            log.info("Lemmas don't match tokens, ignoring lemmas: " + "lemmas " + lemmas + ", tokens " + leafToks);
            lemmas = null;
        }
        //Terminals can have multiple tokens (MWEs). Make these into a
        //flat structure for now.
        Tree t = null;
        List<Tree> kids = new ArrayList<>();
        if (leafToks.size() > 1) {
            for (int i = 0; i < leafToks.size(); ++i) {
                String tok = leafToks.get(i);
                String s = treeNormalizer.normalizeTerminal(tok);
                List<Tree> leafList = new ArrayList<>();
                Tree leafNode = treeFactory.newLeaf(s);
                if (leafNode.label() instanceof HasWord)
                    ((HasWord) leafNode.label()).setWord(s);
                if (leafNode.label() instanceof CoreLabel && lemmas != null) {
                    ((CoreLabel) leafNode.label()).setLemma(lemmas.get(i));
                }
                if (leafNode.label() instanceof HasContext) {
                    ((HasContext) leafNode.label()).setOriginalText(morph);
                }
                if (leafNode.label() instanceof HasCategory) {
                    ((HasCategory) leafNode.label()).setCategory(subcat);
                }
                leafList.add(leafNode);
                Tree posNode = treeFactory.newTreeNode(MISSING_POS, leafList);
                if (posNode.label() instanceof HasTag)
                    ((HasTag) posNode.label()).setTag(MISSING_POS);
                kids.add(posNode);
            }
            t = treeFactory.newTreeNode(MISSING_PHRASAL, kids);
        } else {
            String leafStr = treeNormalizer.normalizeTerminal(leafToks.get(0));
            Tree leafNode = treeFactory.newLeaf(leafStr);
            if (leafNode.label() instanceof HasWord)
                ((HasWord) leafNode.label()).setWord(leafStr);
            if (leafNode.label() instanceof CoreLabel && lemmas != null) {
                ((CoreLabel) leafNode.label()).setLemma(lemmas.get(0));
            }
            if (leafNode.label() instanceof HasContext) {
                ((HasContext) leafNode.label()).setOriginalText(morph);
            }
            if (leafNode.label() instanceof HasCategory) {
                ((HasCategory) leafNode.label()).setCategory(subcat);
            }
            kids.add(leafNode);
            t = treeFactory.newTreeNode(posStr, kids);
            if (t.label() instanceof HasTag)
                ((HasTag) t.label()).setTag(posStr);
        }
        return t;
    }
    List<Tree> kids = new ArrayList<>();
    for (Node childNode = eRoot.getFirstChild(); childNode != null; childNode = childNode.getNextSibling()) {
        if (childNode.getNodeType() != Node.ELEMENT_NODE)
            continue;
        Tree t = getTreeFromXML(childNode);
        if (t == null) {
            System.err.printf("%s: Discarding empty tree (root: %s)%n", this.getClass().getName(), childNode.getNodeName());
        } else {
            kids.add(t);
        }
    }
    // MWEs have a label with a
    String rootLabel = eRoot.getNodeName().trim();
    boolean isMWE = rootLabel.equals("w") && eRoot.hasAttribute(ATTR_POS);
    if (isMWE)
        rootLabel = eRoot.getAttribute(ATTR_POS).trim();
    Tree t = (kids.size() == 0) ? null : treeFactory.newTreeNode(treeNormalizer.normalizeNonterminal(rootLabel), kids);
    if (t != null && isMWE)
        t = postProcessMWE(t);
    return t;
}
Also used : Element(org.w3c.dom.Element) Node(org.w3c.dom.Node) Tree(edu.stanford.nlp.trees.Tree)

Example 27 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class FrenchXMLTreeReader method main.

/**
   * For debugging.
   *
   * @param args
   */
public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s tree_file(s)%n%n", FrenchXMLTreeReader.class.getName());
        System.exit(-1);
    }
    List<File> fileList = new ArrayList<>();
    for (String arg : args) fileList.add(new File(arg));
    TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false);
    int totalTrees = 0;
    Set<String> morphAnalyses = Generics.newHashSet();
    try {
        for (File file : fileList) {
            TreeReader tr = trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "UTF-8")));
            Tree t;
            int numTrees;
            String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf('.'));
            for (numTrees = 0; (t = tr.readTree()) != null; numTrees++) {
                String ftbID = ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
                System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, t.toString());
                List<Label> leaves = t.yield();
                for (Label label : leaves) {
                    if (label instanceof CoreLabel)
                        morphAnalyses.add(((CoreLabel) label).originalText());
                }
            }
            tr.close();
            System.err.printf("%s: %d trees%n", file.getName(), numTrees);
            totalTrees += numTrees;
        }
        //wsg2011: Print out the observed morphological analyses
        //      for(String analysis : morphAnalyses)
        //        log.info(analysis);
        System.err.printf("%nRead %d trees%n", totalTrees);
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 28 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class SplitMaker method main.

/**
   * @param args
   */
public static void main(String[] args) {
    if (args.length != 1) {
        System.err.printf("Usage: java %s tree_file%n", SplitMaker.class.getName());
        System.exit(-1);
    }
    TreebankLanguagePack tlp = new HebrewTreebankLanguagePack();
    String inputFile = args[0];
    File treeFile = new File(inputFile);
    try {
        TreeReaderFactory trf = new HebrewTreeReaderFactory();
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), tlp.getEncoding()));
        TreeReader tr = trf.newTreeReader(br);
        PrintWriter pwDev = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.dev"), false, tlp.getEncoding()));
        PrintWriter pwTrain = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.train"), false, tlp.getEncoding()));
        PrintWriter pwTest = new PrintWriter(new PrintStream(new FileOutputStream(inputFile + ".clean.test"), false, tlp.getEncoding()));
        int numTrees = 0;
        for (Tree t; ((t = tr.readTree()) != null); numTrees++) {
            if (numTrees < 483)
                pwDev.println(t.toString());
            else if (numTrees >= 483 && numTrees < 5724)
                pwTrain.println(t.toString());
            else
                pwTest.println(t.toString());
        }
        tr.close();
        pwDev.close();
        pwTrain.close();
        pwTest.close();
        System.err.printf("Processed %d trees.%n", numTrees);
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) Tree(edu.stanford.nlp.trees.Tree) TreebankLanguagePack(edu.stanford.nlp.trees.TreebankLanguagePack) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Example 29 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class NegraHeadFinder method determineNonTrivialHead.

/** Called by determineHead and may be overridden in subclasses
   *  if special treatment is necessary for particular categories.
   */
protected Tree determineNonTrivialHead(Tree t, Tree parent) {
    Tree theHead = null;
    String motherCat = basicCategory(t.label().value());
    if (motherCat.startsWith("@")) {
        motherCat = motherCat.substring(1);
    }
    if (DEBUG) {
        log.info("Looking for head of " + t.label() + "; value is |" + t.label().value() + "|, " + " baseCat is |" + motherCat + "|");
    }
    // We know we have nonterminals underneath
    // (a bit of a Penn Treebank assumption, but).
    //   Look at label.
    String[][] how = nonTerminalInfo.get(motherCat);
    if (how == null) {
        if (DEBUG) {
            log.info("Warning: No rule found for " + motherCat + " (first char: " + motherCat.charAt(0) + ")");
            log.info("Known nonterms are: " + nonTerminalInfo.keySet());
        }
        if (defaultRule != null) {
            if (DEBUG) {
                log.info("  Using defaultRule");
            }
            return traverseLocate(t.children(), defaultRule, true);
        } else {
            return null;
        }
    }
    for (int i = 0; i < how.length; i++) {
        boolean deflt = (i == how.length - 1);
        theHead = traverseLocate(t.children(), how[i], deflt);
        if (theHead != null) {
            break;
        }
    }
    if (DEBUG) {
        log.info("  Chose " + theHead.label());
    }
    return theHead;
}
Also used : Tree(edu.stanford.nlp.trees.Tree)

Example 30 with Tree

use of edu.stanford.nlp.trees.Tree in project CoreNLP by stanfordnlp.

the class SpanishXMLTreeReader method process.

/**
   * Read trees from the given file and output their processed forms to
   * standard output.
   */
public static void process(File file, TreeReader tr, Pattern posPattern, Pattern wordPattern, boolean plainPrint) throws IOException {
    Tree t;
    int numTrees = 0, numTreesRetained = 0;
    String canonicalFileName = file.getName().substring(0, file.getName().lastIndexOf('.'));
    while ((t = tr.readTree()) != null) {
        numTrees++;
        if (!shouldPrintTree(t, posPattern, wordPattern))
            continue;
        numTreesRetained++;
        String ftbID = ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
        String output = toString(t, plainPrint);
        System.out.printf("%s-%s\t%s%n", canonicalFileName, ftbID, output);
    }
    System.err.printf("%s: %d trees, %d matched and printed%n", file.getName(), numTrees, numTreesRetained);
}
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) Tree(edu.stanford.nlp.trees.Tree)

Aggregations

Tree (edu.stanford.nlp.trees.Tree)329 CoreLabel (edu.stanford.nlp.ling.CoreLabel)99 ArrayList (java.util.ArrayList)59 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)55 TreeCoreAnnotations (edu.stanford.nlp.trees.TreeCoreAnnotations)43 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)32 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)30 CoreMap (edu.stanford.nlp.util.CoreMap)27 List (java.util.List)27 Label (edu.stanford.nlp.ling.Label)24 SemanticGraph (edu.stanford.nlp.semgraph.SemanticGraph)21 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)20 TreeReader (edu.stanford.nlp.trees.TreeReader)19 PrintWriter (java.io.PrintWriter)19 Language (edu.stanford.nlp.international.Language)17 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)16 Treebank (edu.stanford.nlp.trees.Treebank)16 IOException (java.io.IOException)16 Mention (edu.stanford.nlp.coref.data.Mention)15 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)15