Search in sources :

Example 1 with ChineseTreebankParserParams

use of edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams in project CoreNLP by stanfordnlp.

the class CharacterLevelTagExtender method main.

/**
   * for testing -- CURRENTLY BROKEN!!!
   *
   * @param args input dir and output filename
   * @throws IOException
   */
public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        throw new RuntimeException("args: treebankPath trainNums testNums");
    }
    ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();
    ctpp.charTags = true;
    // TODO: these options are getting clobbered by reading in the
    // parser object (unless it's a text file parser?)
    Options op = new Options(ctpp);
    op.doDep = false;
    op.testOptions.maxLength = 90;
    LexicalizedParser lp;
    try {
        FileFilter trainFilt = new NumberRangesFileFilter(args[1], false);
        lp = LexicalizedParser.trainFromTreebank(args[0], trainFilt, op);
        try {
            String filename = "chineseCharTagPCFG.ser.gz";
            log.info("Writing parser in serialized format to file " + filename + " ");
            System.err.flush();
            ObjectOutputStream out = IOUtils.writeStreamFromString(filename);
            out.writeObject(lp);
            out.close();
            log.info("done.");
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    } catch (IllegalArgumentException e) {
        lp = LexicalizedParser.loadModel(args[1], op);
    }
    FileFilter testFilt = new NumberRangesFileFilter(args[2], false);
    MemoryTreebank testTreebank = ctpp.memoryTreebank();
    testTreebank.loadPath(new File(args[0]), testFilt);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
    WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
    WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
    EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck);
    //    System.out.println("Preterminals:" + preterminals);
    System.out.println("Testing...");
    for (Tree gold : testTreebank) {
        Tree tree;
        try {
            tree = lp.parseTree(gold.yieldHasWord());
            if (tree == null) {
                System.out.println("Failed to parse " + gold.yieldHasWord());
                continue;
            }
        } catch (Exception e) {
            e.printStackTrace();
            continue;
        }
        gold = gold.firstChild();
        pw.println(SentenceUtils.listToString(gold.preTerminalYield()));
        pw.println(SentenceUtils.listToString(gold.yield()));
        gold.pennPrint(pw);
        pw.println(tree.preTerminalYield());
        pw.println(tree.yield());
        tree.pennPrint(pw);
        //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
        //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
        //      eval.eval(allBrackets, goldBrackets);
        eval.displayLast();
    }
    System.out.println();
    System.out.println();
    eval.display();
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) EquivalenceClassEval(edu.stanford.nlp.stats.EquivalenceClassEval) ChineseTreebankParserParams(edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter)

Aggregations

NumberRangesFileFilter (edu.stanford.nlp.io.NumberRangesFileFilter)1 ChineseTreebankParserParams (edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams)1 LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)1 Options (edu.stanford.nlp.parser.lexparser.Options)1 EquivalenceClassEval (edu.stanford.nlp.stats.EquivalenceClassEval)1