Search in sources :

Example 16 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class CombineDVModels method main.

public static void main(String[] args) throws IOException, ClassNotFoundException {
    String modelPath = null;
    List<String> baseModelPaths = null;
    String testTreebankPath = null;
    FileFilter testTreebankFilter = null;
    List<String> unusedArgs = new ArrayList<>();
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-model")) {
            modelPath = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-testTreebank")) {
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-testTreebank");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            testTreebankPath = treebankDescription.first();
            testTreebankFilter = treebankDescription.second();
        } else if (args[argIndex].equalsIgnoreCase("-baseModels")) {
            argIndex++;
            baseModelPaths = new ArrayList<>();
            while (argIndex < args.length && args[argIndex].charAt(0) != '-') {
                baseModelPaths.add(args[argIndex++]);
            }
            if (baseModelPaths.size() == 0) {
                throw new IllegalArgumentException("Found an argument -baseModels with no actual models named");
            }
        } else {
            unusedArgs.add(args[argIndex++]);
        }
    }
    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    LexicalizedParser underlyingParser = null;
    Options options = null;
    LexicalizedParser combinedParser = null;
    if (baseModelPaths != null) {
        List<DVModel> dvparsers = new ArrayList<>();
        for (String baseModelPath : baseModelPaths) {
            log.info("Loading serialized DVParser from " + baseModelPath);
            LexicalizedParser dvparser = LexicalizedParser.loadModel(baseModelPath);
            Reranker reranker = dvparser.reranker;
            if (!(reranker instanceof DVModelReranker)) {
                throw new IllegalArgumentException("Expected parsers with DVModel embedded");
            }
            dvparsers.add(((DVModelReranker) reranker).getModel());
            if (underlyingParser == null) {
                underlyingParser = dvparser;
                options = underlyingParser.getOp();
                // TODO: other parser's options?
                options.setOptions(newArgs);
            }
            log.info("... done");
        }
        combinedParser = LexicalizedParser.copyLexicalizedParser(underlyingParser);
        CombinedDVModelReranker reranker = new CombinedDVModelReranker(options, dvparsers);
        combinedParser.reranker = reranker;
        combinedParser.saveParserToSerialized(modelPath);
    } else {
        throw new IllegalArgumentException("Need to specify -model to load an already prepared CombinedParser");
    }
    Treebank testTreebank = null;
    if (testTreebankPath != null) {
        log.info("Reading in trees from " + testTreebankPath);
        if (testTreebankFilter != null) {
            log.info("Filtering on " + testTreebankFilter);
        }
        testTreebank = combinedParser.getOp().tlpParams.memoryTreebank();
        ;
        testTreebank.loadPath(testTreebankPath, testTreebankFilter);
        log.info("Read in " + testTreebank.size() + " trees for testing");
        EvaluateTreebank evaluator = new EvaluateTreebank(combinedParser.getOp(), null, combinedParser);
        evaluator.testOnTreebank(testTreebank);
    }
}
Also used : Options(edu.stanford.nlp.parser.lexparser.Options) Reranker(edu.stanford.nlp.parser.lexparser.Reranker) EvaluateTreebank(edu.stanford.nlp.parser.lexparser.EvaluateTreebank) Treebank(edu.stanford.nlp.trees.Treebank) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) ArrayList(java.util.ArrayList) EvaluateTreebank(edu.stanford.nlp.parser.lexparser.EvaluateTreebank) FileFilter(java.io.FileFilter) Pair(edu.stanford.nlp.util.Pair)

Example 17 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class PrintTagList method main.

public static void main(String[] args) {
    String parserFile = null;
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-model")) {
            parserFile = args[argIndex + 1];
            argIndex += 2;
        } else {
            String error = "Unknown argument " + args[argIndex];
            log.info(error);
            throw new RuntimeException(error);
        }
    }
    if (parserFile == null) {
        log.info("Must specify a model file with -model");
        System.exit(2);
    }
    LexicalizedParser parser = LexicalizedParser.loadModel(parserFile);
    Set<String> tags = Generics.newTreeSet();
    for (String tag : parser.tagIndex) {
        tags.add(parser.treebankLanguagePack().basicCategory(tag));
    }
    System.out.println("Basic tags: " + tags.size());
    for (String tag : tags) {
        System.out.print("  " + tag);
    }
    System.out.println();
    System.out.println("All tags size: " + parser.tagIndex.size());
    Set<String> states = Generics.newTreeSet();
    for (String state : parser.stateIndex) {
        states.add(parser.treebankLanguagePack().basicCategory(state));
    }
    System.out.println("Basic states: " + states.size());
    for (String tag : states) {
        System.out.print("  " + tag);
    }
    System.out.println();
    System.out.println("All states size: " + parser.stateIndex.size());
    System.out.println("Unary grammar size: " + parser.ug.numRules());
    System.out.println("Binary grammar size: " + parser.bg.numRules());
}
Also used : LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser)

Example 18 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class ParserDemo method main.

/**
   * The main method demonstrates the easiest way to load a parser.
   * Simply call loadModel and specify the path of a serialized grammar
   * model, which can be a file, a resource on the classpath, or even a URL.
   * For example, this demonstrates loading a grammar from the models jar
   * file, which you therefore need to include on the classpath for ParserDemo
   * to work.
   *
   * Usage: {@code java ParserDemo [[model] textFile]}
   * e.g.: java ParserDemo edu/stanford/nlp/models/lexparser/chineseFactored.ser.gz data/chinese-onesent-utf8.txt
   *
   */
public static void main(String[] args) {
    String parserModel = "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    if (args.length > 0) {
        parserModel = args[0];
    }
    LexicalizedParser lp = LexicalizedParser.loadModel(parserModel);
    if (args.length == 0) {
        demoAPI(lp);
    } else {
        String textFile = (args.length > 1) ? args[1] : args[0];
        demoDP(lp, textFile);
    }
}
Also used : LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser)

Example 19 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class ParserDemo2 method main.

/** This example shows a few more ways of providing input to a parser.
   *
   *  Usage: ParserDemo2 [grammar [textFile]]
   */
public static void main(String[] args) throws IOException {
    String grammar = args.length > 0 ? args[0] : "edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz";
    String[] options = { "-maxLength", "80", "-retainTmpSubcategories" };
    LexicalizedParser lp = LexicalizedParser.loadModel(grammar, options);
    TreebankLanguagePack tlp = lp.getOp().langpack();
    GrammaticalStructureFactory gsf = tlp.grammaticalStructureFactory();
    Iterable<List<? extends HasWord>> sentences;
    if (args.length > 1) {
        DocumentPreprocessor dp = new DocumentPreprocessor(args[1]);
        List<List<? extends HasWord>> tmp = new ArrayList<>();
        for (List<HasWord> sentence : dp) {
            tmp.add(sentence);
        }
        sentences = tmp;
    } else {
        // Showing tokenization and parsing in code a couple of different ways.
        String[] sent = { "This", "is", "an", "easy", "sentence", "." };
        List<HasWord> sentence = new ArrayList<>();
        for (String word : sent) {
            sentence.add(new Word(word));
        }
        String sent2 = ("This is a slightly longer and more complex " + "sentence requiring tokenization.");
        // Use the default tokenizer for this TreebankLanguagePack
        Tokenizer<? extends HasWord> toke = tlp.getTokenizerFactory().getTokenizer(new StringReader(sent2));
        List<? extends HasWord> sentence2 = toke.tokenize();
        String[] sent3 = { "It", "can", "can", "it", "." };
        // Parser gets second "can" wrong without help
        String[] tag3 = { "PRP", "MD", "VB", "PRP", "." };
        List<TaggedWord> sentence3 = new ArrayList<>();
        for (int i = 0; i < sent3.length; i++) {
            sentence3.add(new TaggedWord(sent3[i], tag3[i]));
        }
        Tree parse = lp.parse(sentence3);
        parse.pennPrint();
        List<List<? extends HasWord>> tmp = new ArrayList<>();
        tmp.add(sentence);
        tmp.add(sentence2);
        tmp.add(sentence3);
        sentences = tmp;
    }
    for (List<? extends HasWord> sentence : sentences) {
        Tree parse = lp.parse(sentence);
        parse.pennPrint();
        System.out.println();
        GrammaticalStructure gs = gsf.newGrammaticalStructure(parse);
        List<TypedDependency> tdl = gs.typedDependenciesCCprocessed();
        System.out.println(tdl);
        System.out.println();
        System.out.println("The words of the sentence:");
        for (Label lab : parse.yield()) {
            if (lab instanceof CoreLabel) {
                System.out.println(((CoreLabel) lab).toString(CoreLabel.OutputFormat.VALUE_MAP));
            } else {
                System.out.println(lab);
            }
        }
        System.out.println();
        System.out.println(parse.taggedYield());
        System.out.println();
    }
    // This method turns the String into a single sentence using the
    // default tokenizer for the TreebankLanguagePack.
    String sent3 = "This is one last test!";
    lp.parse(sent3).pennPrint();
}
Also used : Word(edu.stanford.nlp.ling.Word) HasWord(edu.stanford.nlp.ling.HasWord) TaggedWord(edu.stanford.nlp.ling.TaggedWord) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) CoreLabel(edu.stanford.nlp.ling.CoreLabel) Label(edu.stanford.nlp.ling.Label) StringReader(java.io.StringReader) HasWord(edu.stanford.nlp.ling.HasWord) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TaggedWord(edu.stanford.nlp.ling.TaggedWord) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor)

Example 20 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class SemanticGraphPrinter method main.

public static void main(String[] args) {
    Treebank tb = new MemoryTreebank();
    Properties props = StringUtils.argsToProperties(args);
    String treeFileName = props.getProperty("treeFile");
    String sentFileName = props.getProperty("sentFile");
    String testGraph = props.getProperty("testGraph");
    if (testGraph == null) {
        testGraph = "false";
    }
    String load = props.getProperty("load");
    String save = props.getProperty("save");
    if (load != null) {
        log.info("Load not implemented!");
        return;
    }
    if (sentFileName == null && treeFileName == null) {
        log.info("Usage: java SemanticGraph [-sentFile file|-treeFile file] [-testGraph]");
        Tree t = Tree.valueOf("(ROOT (S (NP (NP (DT An) (NN attempt)) (PP (IN on) (NP (NP (NNP Andres) (NNP Pastrana) (POS 's)) (NN life)))) (VP (VBD was) (VP (VBN carried) (PP (IN out) (S (VP (VBG using) (NP (DT a) (JJ powerful) (NN bomb))))))) (. .)))");
        tb.add(t);
    } else if (treeFileName != null) {
        tb.loadPath(treeFileName);
    } else {
        String[] options = { "-retainNPTmpSubcategories" };
        LexicalizedParser lp = LexicalizedParser.loadModel("/u/nlp/data/lexparser/englishPCFG.ser.gz", options);
        BufferedReader reader = null;
        try {
            reader = IOUtils.readerFromString(sentFileName);
        } catch (IOException e) {
            throw new RuntimeIOException("Cannot find or open " + sentFileName, e);
        }
        try {
            System.out.println("Processing sentence file " + sentFileName);
            for (String line; (line = reader.readLine()) != null; ) {
                System.out.println("Processing sentence: " + line);
                PTBTokenizer<Word> ptb = PTBTokenizer.newPTBTokenizer(new StringReader(line));
                List<Word> words = ptb.tokenize();
                Tree parseTree = lp.parseTree(words);
                tb.add(parseTree);
            }
            reader.close();
        } catch (Exception e) {
            throw new RuntimeException("Exception reading key file " + sentFileName, e);
        }
    }
    for (Tree t : tb) {
        SemanticGraph sg = SemanticGraphFactory.generateUncollapsedDependencies(t);
        System.out.println(sg.toString());
        System.out.println(sg.toCompactString());
        if (testGraph.equals("true")) {
            SemanticGraph g1 = SemanticGraphFactory.generateCollapsedDependencies(t);
            System.out.println("TEST SEMANTIC GRAPH - graph ----------------------------");
            System.out.println(g1.toString());
            System.out.println("readable ----------------------------");
            System.out.println(g1.toString(SemanticGraph.OutputFormat.READABLE));
            System.out.println("List of dependencies ----------------------------");
            System.out.println(g1.toList());
            System.out.println("xml ----------------------------");
            System.out.println(g1.toString(SemanticGraph.OutputFormat.XML));
            System.out.println("dot ----------------------------");
            System.out.println(g1.toDotFormat());
            System.out.println("dot (simple) ----------------------------");
            System.out.println(g1.toDotFormat("Simple", CoreLabel.OutputFormat.VALUE));
        // System.out.println(" graph ----------------------------");
        // System.out.println(t.allTypedDependenciesCCProcessed(false));
        }
    }
    if (save != null) {
        log.info("Save not implemented!");
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) Treebank(edu.stanford.nlp.trees.Treebank) MemoryTreebank(edu.stanford.nlp.trees.MemoryTreebank) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) Properties(java.util.Properties) IOException(java.io.IOException) FileNotFoundException(java.io.FileNotFoundException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) PTBTokenizer(edu.stanford.nlp.process.PTBTokenizer) BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader) Tree(edu.stanford.nlp.trees.Tree) List(java.util.List) MemoryTreebank(edu.stanford.nlp.trees.MemoryTreebank)

Aggregations

LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)20 Tree (edu.stanford.nlp.trees.Tree)7 Pair (edu.stanford.nlp.util.Pair)7 Treebank (edu.stanford.nlp.trees.Treebank)6 FileFilter (java.io.FileFilter)5 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 StringReader (java.io.StringReader)4 SimpleMatrix (org.ejml.simple.SimpleMatrix)4 HasWord (edu.stanford.nlp.ling.HasWord)3 EvaluateTreebank (edu.stanford.nlp.parser.lexparser.EvaluateTreebank)3 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)3 BufferedWriter (java.io.BufferedWriter)3 FileWriter (java.io.FileWriter)3 Word (edu.stanford.nlp.ling.Word)2 ParserQuery (edu.stanford.nlp.parser.common.ParserQuery)2 Options (edu.stanford.nlp.parser.lexparser.Options)2 RerankingParserQuery (edu.stanford.nlp.parser.lexparser.RerankingParserQuery)2 DeepTree (edu.stanford.nlp.trees.DeepTree)2 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)2