Search in sources :

Example 1 with Options

use of edu.stanford.nlp.parser.lexparser.Options in project CoreNLP by stanfordnlp.

the class CharacterLevelTagExtender method main.

/**
   * for testing -- CURRENTLY BROKEN!!!
   *
   * @param args input dir and output filename
   * @throws IOException
   */
public static void main(String[] args) throws IOException {
    if (args.length != 3) {
        throw new RuntimeException("args: treebankPath trainNums testNums");
    }
    ChineseTreebankParserParams ctpp = new ChineseTreebankParserParams();
    ctpp.charTags = true;
    // TODO: these options are getting clobbered by reading in the
    // parser object (unless it's a text file parser?)
    Options op = new Options(ctpp);
    op.doDep = false;
    op.testOptions.maxLength = 90;
    LexicalizedParser lp;
    try {
        FileFilter trainFilt = new NumberRangesFileFilter(args[1], false);
        lp = LexicalizedParser.trainFromTreebank(args[0], trainFilt, op);
        try {
            String filename = "chineseCharTagPCFG.ser.gz";
            log.info("Writing parser in serialized format to file " + filename + " ");
            System.err.flush();
            ObjectOutputStream out = IOUtils.writeStreamFromString(filename);
            out.writeObject(lp);
            out.close();
            log.info("done.");
        } catch (IOException ioe) {
            ioe.printStackTrace();
        }
    } catch (IllegalArgumentException e) {
        lp = LexicalizedParser.loadModel(args[1], op);
    }
    FileFilter testFilt = new NumberRangesFileFilter(args[2], false);
    MemoryTreebank testTreebank = ctpp.memoryTreebank();
    testTreebank.loadPath(new File(args[0]), testFilt);
    PrintWriter pw = new PrintWriter(new OutputStreamWriter(new FileOutputStream("out.chi"), "GB18030"), true);
    WordCatEquivalenceClasser eqclass = new WordCatEquivalenceClasser();
    WordCatEqualityChecker eqcheck = new WordCatEqualityChecker();
    EquivalenceClassEval eval = new EquivalenceClassEval(eqclass, eqcheck);
    //    System.out.println("Preterminals:" + preterminals);
    System.out.println("Testing...");
    for (Tree gold : testTreebank) {
        Tree tree;
        try {
            tree = lp.parseTree(gold.yieldHasWord());
            if (tree == null) {
                System.out.println("Failed to parse " + gold.yieldHasWord());
                continue;
            }
        } catch (Exception e) {
            e.printStackTrace();
            continue;
        }
        gold = gold.firstChild();
        pw.println(SentenceUtils.listToString(gold.preTerminalYield()));
        pw.println(SentenceUtils.listToString(gold.yield()));
        gold.pennPrint(pw);
        pw.println(tree.preTerminalYield());
        pw.println(tree.yield());
        tree.pennPrint(pw);
        //      Collection allBrackets = WordCatConstituent.allBrackets(tree);
        //      Collection goldBrackets = WordCatConstituent.allBrackets(gold);
        //      eval.eval(allBrackets, goldBrackets);
        eval.displayLast();
    }
    System.out.println();
    System.out.println();
    eval.display();
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) EquivalenceClassEval(edu.stanford.nlp.stats.EquivalenceClassEval) ChineseTreebankParserParams(edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams) NumberRangesFileFilter(edu.stanford.nlp.io.NumberRangesFileFilter)

Example 2 with Options

use of edu.stanford.nlp.parser.lexparser.Options in project CoreNLP by stanfordnlp.

the class ParserGrammar method loadTagger.

public Function<List<? extends HasWord>, List<TaggedWord>> loadTagger() {
    Options op = getOp();
    if (op.testOptions.preTag) {
        synchronized (this) {
            // TODO: rather coarse synchronization
            if (!op.testOptions.taggerSerializedFile.equals(taggerPath)) {
                taggerPath = op.testOptions.taggerSerializedFile;
                tagger = ReflectionLoading.loadByReflection("edu.stanford.nlp.tagger.maxent.MaxentTagger", taggerPath);
            }
            return tagger;
        }
    } else {
        return null;
    }
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options)

Example 3 with Options

use of edu.stanford.nlp.parser.lexparser.Options in project CoreNLP by stanfordnlp.

the class ReorderingOracleTest method setUp.

public void setUp() {
    Options op = new Options();
    Treebank treebank = op.tlpParams.memoryTreebank();
    treebank.addAll(Arrays.asList(correctTrees));
    binarizedTrees = ShiftReduceParser.binarizeTreebank(treebank, op);
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options) Treebank(edu.stanford.nlp.trees.Treebank)

Example 4 with Options

use of edu.stanford.nlp.parser.lexparser.Options in project CoreNLP by stanfordnlp.

the class CombineDVModels method main.

public static void main(String[] args) throws IOException, ClassNotFoundException {
    String modelPath = null;
    List<String> baseModelPaths = null;
    String testTreebankPath = null;
    FileFilter testTreebankFilter = null;
    List<String> unusedArgs = new ArrayList<>();
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-model")) {
            modelPath = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-testTreebank")) {
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-testTreebank");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            testTreebankPath = treebankDescription.first();
            testTreebankFilter = treebankDescription.second();
        } else if (args[argIndex].equalsIgnoreCase("-baseModels")) {
            argIndex++;
            baseModelPaths = new ArrayList<>();
            while (argIndex < args.length && args[argIndex].charAt(0) != '-') {
                baseModelPaths.add(args[argIndex++]);
            }
            if (baseModelPaths.size() == 0) {
                throw new IllegalArgumentException("Found an argument -baseModels with no actual models named");
            }
        } else {
            unusedArgs.add(args[argIndex++]);
        }
    }
    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    LexicalizedParser underlyingParser = null;
    Options options = null;
    LexicalizedParser combinedParser = null;
    if (baseModelPaths != null) {
        List<DVModel> dvparsers = new ArrayList<>();
        for (String baseModelPath : baseModelPaths) {
            log.info("Loading serialized DVParser from " + baseModelPath);
            LexicalizedParser dvparser = LexicalizedParser.loadModel(baseModelPath);
            Reranker reranker = dvparser.reranker;
            if (!(reranker instanceof DVModelReranker)) {
                throw new IllegalArgumentException("Expected parsers with DVModel embedded");
            }
            dvparsers.add(((DVModelReranker) reranker).getModel());
            if (underlyingParser == null) {
                underlyingParser = dvparser;
                options = underlyingParser.getOp();
                // TODO: other parser's options?
                options.setOptions(newArgs);
            }
            log.info("... done");
        }
        combinedParser = LexicalizedParser.copyLexicalizedParser(underlyingParser);
        CombinedDVModelReranker reranker = new CombinedDVModelReranker(options, dvparsers);
        combinedParser.reranker = reranker;
        combinedParser.saveParserToSerialized(modelPath);
    } else {
        throw new IllegalArgumentException("Need to specify -model to load an already prepared CombinedParser");
    }
    Treebank testTreebank = null;
    if (testTreebankPath != null) {
        log.info("Reading in trees from " + testTreebankPath);
        if (testTreebankFilter != null) {
            log.info("Filtering on " + testTreebankFilter);
        }
        testTreebank = combinedParser.getOp().tlpParams.memoryTreebank();
        ;
        testTreebank.loadPath(testTreebankPath, testTreebankFilter);
        log.info("Read in " + testTreebank.size() + " trees for testing");
        EvaluateTreebank evaluator = new EvaluateTreebank(combinedParser.getOp(), null, combinedParser);
        evaluator.testOnTreebank(testTreebank);
    }
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options) Reranker(edu.stanford.nlp.parser.lexparser.Reranker) EvaluateTreebank(edu.stanford.nlp.parser.lexparser.EvaluateTreebank) Treebank(edu.stanford.nlp.trees.Treebank) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) ArrayList(java.util.ArrayList) EvaluateTreebank(edu.stanford.nlp.parser.lexparser.EvaluateTreebank) FileFilter(java.io.FileFilter) Pair(edu.stanford.nlp.util.Pair)

Example 5 with Options

use of edu.stanford.nlp.parser.lexparser.Options in project CoreNLP by stanfordnlp.

the class UNKPrinter method main.

public static void main(String[] args) {
    if (args.length < minArgs) {
        System.out.println(usage.toString());
        System.exit(-1);
    }
    TreebankLangParserParams tlpp = new EnglishTreebankParserParams();
    DiskTreebank tb = null;
    String encoding = "UTF-8";
    Language lang = Language.English;
    for (int i = 0; i < args.length; i++) {
        if (args[i].startsWith("-")) {
            switch(args[i]) {
                case "-l":
                    lang = Language.valueOf(args[++i].trim());
                    tlpp = lang.params;
                    break;
                case "-e":
                    encoding = args[++i];
                    break;
                default:
                    System.out.println(usage.toString());
                    System.exit(-1);
            }
        } else {
            if (tb == null) {
                if (tlpp == null) {
                    System.out.println(usage.toString());
                    System.exit(-1);
                } else {
                    tlpp.setInputEncoding(encoding);
                    tlpp.setOutputEncoding(encoding);
                    tb = tlpp.diskTreebank();
                }
            }
            tb.loadPath(args[i]);
        }
    }
    PrintWriter pw = tlpp.pw();
    Options op = new Options();
    Options.LexOptions lexOptions = op.lexOptions;
    if (lang == Language.French) {
        lexOptions.useUnknownWordSignatures = 1;
        lexOptions.smartMutation = false;
        lexOptions.unknownSuffixSize = 2;
        lexOptions.unknownPrefixSize = 1;
    } else if (lang == Language.Arabic) {
        lexOptions.smartMutation = false;
        lexOptions.useUnknownWordSignatures = 9;
        lexOptions.unknownPrefixSize = 1;
        lexOptions.unknownSuffixSize = 1;
    }
    Index<String> wordIndex = new HashIndex<>();
    Index<String> tagIndex = new HashIndex<>();
    Lexicon lex = tlpp.lex(op, wordIndex, tagIndex);
    int computeAfter = (int) (0.50 * tb.size());
    Counter<String> vocab = new ClassicCounter<>();
    Counter<String> unkCounter = new ClassicCounter<>();
    int treeId = 0;
    for (Tree t : tb) {
        List<Label> yield = t.yield();
        int posId = 0;
        for (Label word : yield) {
            vocab.incrementCount(word.value());
            if (treeId > computeAfter && vocab.getCount(word.value()) < 2.0)
                //          if(lex.getUnknownWordModel().getSignature(word.value(), posId++).equals("UNK"))
                //            pw.println(word.value());
                unkCounter.incrementCount(lex.getUnknownWordModel().getSignature(word.value(), posId++));
        }
        treeId++;
    }
    List<String> biggestKeys = new ArrayList<>(unkCounter.keySet());
    Collections.sort(biggestKeys, Counters.toComparatorDescending(unkCounter));
    for (String wordType : biggestKeys) pw.printf("%s\t%d%n", wordType, (int) unkCounter.getCount(wordType));
    pw.close();
    pw.close();
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options) DiskTreebank(edu.stanford.nlp.trees.DiskTreebank) Lexicon(edu.stanford.nlp.parser.lexparser.Lexicon) Label(edu.stanford.nlp.ling.Label) ArrayList(java.util.ArrayList) HashIndex(edu.stanford.nlp.util.HashIndex) TreebankLangParserParams(edu.stanford.nlp.parser.lexparser.TreebankLangParserParams) EnglishTreebankParserParams(edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams) Language(edu.stanford.nlp.international.Language) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) Tree(edu.stanford.nlp.trees.Tree) PrintWriter(java.io.PrintWriter)

Aggregations

Options (edu.stanford.nlp.parser.lexparser.Options)7 Tree (edu.stanford.nlp.trees.Tree)3 LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)2 Treebank (edu.stanford.nlp.trees.Treebank)2 ArrayList (java.util.ArrayList)2 Language (edu.stanford.nlp.international.Language)1 NumberRangesFileFilter (edu.stanford.nlp.io.NumberRangesFileFilter)1 Label (edu.stanford.nlp.ling.Label)1 BinaryHeadFinder (edu.stanford.nlp.parser.lexparser.BinaryHeadFinder)1 ChineseTreebankParserParams (edu.stanford.nlp.parser.lexparser.ChineseTreebankParserParams)1 EnglishTreebankParserParams (edu.stanford.nlp.parser.lexparser.EnglishTreebankParserParams)1 EvaluateTreebank (edu.stanford.nlp.parser.lexparser.EvaluateTreebank)1 Lexicon (edu.stanford.nlp.parser.lexparser.Lexicon)1 Reranker (edu.stanford.nlp.parser.lexparser.Reranker)1 TreebankLangParserParams (edu.stanford.nlp.parser.lexparser.TreebankLangParserParams)1 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)1 EquivalenceClassEval (edu.stanford.nlp.stats.EquivalenceClassEval)1 DiskTreebank (edu.stanford.nlp.trees.DiskTreebank)1 HeadFinder (edu.stanford.nlp.trees.HeadFinder)1 MemoryTreebank (edu.stanford.nlp.trees.MemoryTreebank)1