Search in sources :

Example 1 with TreeFactory

use of edu.stanford.nlp.trees.TreeFactory in project CoreNLP by stanfordnlp.

the class ManipulateTopBracket method main.

public static void main(String[] args) {
    if (args.length < minArgs) {
        System.out.println(usage());
        System.exit(-1);
    }
    Properties options = StringUtils.argsToProperties(args, argDefs());
    Language language = PropertiesUtils.get(options, "l", Language.English, Language.class);
    TreebankLangParserParams tlpp = language.params;
    DiskTreebank tb = null;
    String encoding = options.getProperty("l", "UTF-8");
    boolean removeBracket = PropertiesUtils.getBool(options, "b", false);
    tlpp.setInputEncoding(encoding);
    tlpp.setOutputEncoding(encoding);
    tb = tlpp.diskTreebank();
    String[] files = options.getProperty("", "").split("\\s+");
    if (files.length != 0) {
        for (String filename : files) {
            tb.loadPath(filename);
        }
    } else {
        log.info(usage());
        System.exit(-1);
    }
    PrintWriter pwo = tlpp.pw();
    String startSymbol = tlpp.treebankLanguagePack().startSymbol();
    TreeFactory tf = new LabeledScoredTreeFactory();
    int nTrees = 0;
    for (Tree t : tb) {
        if (removeBracket) {
            if (t.value().equals(startSymbol)) {
                t = t.firstChild();
            }
        } else if (!t.value().equals(startSymbol)) {
            //Add a bracket if it isn't already there
            t = tf.newTreeNode(startSymbol, Collections.singletonList(t));
        }
        pwo.println(t.toString());
        nTrees++;
    }
    pwo.close();
    System.err.printf("Processed %d trees.%n", nTrees);
}
Also used : DiskTreebank(edu.stanford.nlp.trees.DiskTreebank) Language(edu.stanford.nlp.international.Language) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) TreeFactory(edu.stanford.nlp.trees.TreeFactory) Tree(edu.stanford.nlp.trees.Tree) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) Properties(java.util.Properties) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) PrintWriter(java.io.PrintWriter)

Example 2 with TreeFactory

use of edu.stanford.nlp.trees.TreeFactory in project CoreNLP by stanfordnlp.

the class MultiWordPreprocessor method resolveDummyTags.

private static void resolveDummyTags(File treeFile, TwoDimensionalCounter<String, String> unigramTagger, boolean retainNER, TreeNormalizer tn) {
    TreeFactory tf = new LabeledScoredTreeFactory();
    MultiWordTreeExpander expander = new MultiWordTreeExpander();
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new SpanishTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        PrintWriter pw = new PrintWriter(new PrintStream(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
        int nTrees = 0;
        for (Tree t; (t = tr.readTree()) != null; nTrees++) {
            traverseAndFix(t, null, unigramTagger, retainNER);
            // Now "decompress" further the expanded trees formed by
            // multiword token splitting
            t = expander.expandPhrases(t, tn, tf);
            if (tn != null)
                t = tn.normalizeWholeTree(t, tf);
            pw.println(t.toString());
        }
        pw.close();
        tr.close();
        System.out.println("Processed " + nTrees + " trees");
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : TreeReader(edu.stanford.nlp.trees.TreeReader) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) TreeFactory(edu.stanford.nlp.trees.TreeFactory) Tree(edu.stanford.nlp.trees.Tree) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory)

Example 3 with TreeFactory

use of edu.stanford.nlp.trees.TreeFactory in project CoreNLP by stanfordnlp.

the class ParserUtils method xTree.

/**
   * Construct a fall through tree in case we can't parse this sentence.
   *
   * @param words Words of the sentence that didn't parse
   * @return A tree with X for all the internal nodes.
   *     Preterminals have the right tag if the words are tagged.
   */
public static Tree xTree(List<? extends HasWord> words) {
    TreeFactory treeFactory = new LabeledScoredTreeFactory();
    List<Tree> lst2 = new ArrayList<>();
    for (HasWord obj : words) {
        String s = obj.word();
        Tree t = treeFactory.newLeaf(s);
        String tag = "XX";
        if (obj instanceof HasTag) {
            if (((HasTag) obj).tag() != null) {
                tag = ((HasTag) obj).tag();
            }
        }
        Tree t2 = treeFactory.newTreeNode(tag, Collections.singletonList(t));
        lst2.add(t2);
    }
    return treeFactory.newTreeNode("X", lst2);
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) TreeFactory(edu.stanford.nlp.trees.TreeFactory) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) HasTag(edu.stanford.nlp.ling.HasTag) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory)

Aggregations

LabeledScoredTreeFactory (edu.stanford.nlp.trees.LabeledScoredTreeFactory)3 Tree (edu.stanford.nlp.trees.Tree)3 TreeFactory (edu.stanford.nlp.trees.TreeFactory)3 Language (edu.stanford.nlp.international.Language)1 HasTag (edu.stanford.nlp.ling.HasTag)1 HasWord (edu.stanford.nlp.ling.HasWord)1 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)1 DiskTreebank (edu.stanford.nlp.trees.DiskTreebank)1 TreeReader (edu.stanford.nlp.trees.TreeReader)1 TreeReaderFactory (edu.stanford.nlp.trees.TreeReaderFactory)1 SpanishTreeReaderFactory (edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory)1 PrintWriter (java.io.PrintWriter)1 ArrayList (java.util.ArrayList)1 Properties (java.util.Properties)1