Search in sources :

Example 11 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class UpdateParserOptions method main.

public static void main(String[] args) {
    String input = null;
    String output = null;
    List<String> extraArgs = Generics.newArrayList();
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-input")) {
            input = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-output")) {
            output = args[argIndex + 1];
            argIndex += 2;
        } else {
            extraArgs.add(args[argIndex++]);
        }
    }
    LexicalizedParser parser = LexicalizedParser.loadModel(input, extraArgs);
    parser.saveParserToSerialized(output);
}
Also used : LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser)

Example 12 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project lucida by claritylab.

the class StanfordParser method initialize.

/**
     * Initializes static resources.
     * 
     * @throws Exception
     */
public static void initialize() throws Exception {
    if (parser != null)
        return;
    Properties properties = Properties.loadFromClassName(StanfordParser.class.getName());
    tlp = new PennTreebankLanguagePack();
    String modelFile = properties.getProperty("modelFile");
    if (modelFile == null)
        throw new Exception("Required property '" + "modelFile' is undefined");
    parser = new LexicalizedParser(modelFile);
}
Also used : LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) Properties(info.ephyra.util.Properties) PennTreebankLanguagePack(edu.stanford.nlp.trees.PennTreebankLanguagePack)

Example 13 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class DependencyIndexITest method testPositions.

public void testPositions() {
    try {
        // System.err.println();
        // System.err.println("One.");
        // check a tree loaded from a reader, using StringLabelFactory
        Tree tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(new StringLabelFactory()))).readTree();
        //System.out.println(tree.pennString());
        checkTree(tree);
        // System.err.println("Two.");
        // check a tree created using Tree.valueOf()
        tree = Tree.valueOf("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))");
        //System.out.println(tree.pennString());
        checkTree(tree);
        // System.err.println("Three.");
        // check a tree loaded from a reader, using CoreLabelFactory
        tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(CoreLabel.factory()))).readTree();
        //System.out.println(tree.pennString());
        checkTree(tree);
        // System.err.println("Four.");
        // check a tree generated by the parser
        LexicalizedParser parser = LexicalizedParser.loadModel();
        tree = parser.parse("Mary had a little lamb .");
        // System.out.println(tree.pennString());
        tree.indexLeaves();
        checkTree(tree);
    } catch (IOException e) {
        // this should never happen
        fail("IOException shouldn't happen.");
    }
}
Also used : PennTreeReader(edu.stanford.nlp.trees.PennTreeReader) StringLabelFactory(edu.stanford.nlp.ling.StringLabelFactory) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) StringReader(java.io.StringReader) Tree(edu.stanford.nlp.trees.Tree) IOException(java.io.IOException) LabeledScoredTreeFactory(edu.stanford.nlp.trees.LabeledScoredTreeFactory)

Example 14 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class AverageDVModels method main.

/**
   * Command line arguments for this program:
   * <br>
   * -output: the model file to output
   * -input: a list of model files to input
   */
public static void main(String[] args) {
    String outputModelFilename = null;
    List<String> inputModelFilenames = Generics.newArrayList();
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-output")) {
            outputModelFilename = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-input")) {
            for (++argIndex; argIndex < args.length && !args[argIndex].startsWith("-"); ++argIndex) {
                inputModelFilenames.addAll(Arrays.asList(args[argIndex].split(",")));
            }
        } else {
            throw new RuntimeException("Unknown argument " + args[argIndex]);
        }
    }
    if (outputModelFilename == null) {
        log.info("Need to specify output model name with -output");
        System.exit(2);
    }
    if (inputModelFilenames.size() == 0) {
        log.info("Need to specify input model names with -input");
        System.exit(2);
    }
    log.info("Averaging " + inputModelFilenames);
    log.info("Outputting result to " + outputModelFilename);
    LexicalizedParser lexparser = null;
    List<DVModel> models = Generics.newArrayList();
    for (String filename : inputModelFilenames) {
        LexicalizedParser parser = LexicalizedParser.loadModel(filename);
        if (lexparser == null) {
            lexparser = parser;
        }
        models.add(DVParser.getModelFromLexicalizedParser(parser));
    }
    List<TwoDimensionalMap<String, String, SimpleMatrix>> binaryTransformMaps = CollectionUtils.transformAsList(models, model -> model.binaryTransform);
    List<TwoDimensionalMap<String, String, SimpleMatrix>> binaryScoreMaps = CollectionUtils.transformAsList(models, model -> model.binaryScore);
    List<Map<String, SimpleMatrix>> unaryTransformMaps = CollectionUtils.transformAsList(models, model -> model.unaryTransform);
    List<Map<String, SimpleMatrix>> unaryScoreMaps = CollectionUtils.transformAsList(models, model -> model.unaryScore);
    List<Map<String, SimpleMatrix>> wordMaps = CollectionUtils.transformAsList(models, model -> model.wordVectors);
    TwoDimensionalMap<String, String, SimpleMatrix> binaryTransformAverages = averageBinaryMatrices(binaryTransformMaps);
    TwoDimensionalMap<String, String, SimpleMatrix> binaryScoreAverages = averageBinaryMatrices(binaryScoreMaps);
    Map<String, SimpleMatrix> unaryTransformAverages = averageUnaryMatrices(unaryTransformMaps);
    Map<String, SimpleMatrix> unaryScoreAverages = averageUnaryMatrices(unaryScoreMaps);
    Map<String, SimpleMatrix> wordAverages = averageUnaryMatrices(wordMaps);
    DVModel newModel = new DVModel(binaryTransformAverages, unaryTransformAverages, binaryScoreAverages, unaryScoreAverages, wordAverages, lexparser.getOp());
    DVParser newParser = new DVParser(newModel, lexparser);
    newParser.saveModel(outputModelFilename);
}
Also used : LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) SimpleMatrix(org.ejml.simple.SimpleMatrix) TwoDimensionalMap(edu.stanford.nlp.util.TwoDimensionalMap) Map(java.util.Map) TwoDimensionalMap(edu.stanford.nlp.util.TwoDimensionalMap)

Example 15 with LexicalizedParser

use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.

the class CacheParseHypotheses method main.

/**
   * An example of a command line is
   * <br>
   * java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz  -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202
   * <br>
   * java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6
   * <br>
   * java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed  026-270,301-499,600-999
   */
public static void main(String[] args) throws IOException {
    String parserModel = null;
    String output = null;
    List<Pair<String, FileFilter>> treebanks = Generics.newArrayList();
    int dvKBest = 200;
    int numThreads = 1;
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-dvKBest")) {
            dvKBest = Integer.valueOf(args[argIndex + 1]);
            argIndex += 2;
            continue;
        }
        if (args[argIndex].equalsIgnoreCase("-parser") || args[argIndex].equals("-model")) {
            parserModel = args[argIndex + 1];
            argIndex += 2;
            continue;
        }
        if (args[argIndex].equalsIgnoreCase("-output")) {
            output = args[argIndex + 1];
            argIndex += 2;
            continue;
        }
        if (args[argIndex].equalsIgnoreCase("-treebank")) {
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-treebank");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            treebanks.add(treebankDescription);
            continue;
        }
        if (args[argIndex].equalsIgnoreCase("-numThreads")) {
            numThreads = Integer.valueOf(args[argIndex + 1]);
            argIndex += 2;
            continue;
        }
        throw new IllegalArgumentException("Unknown argument " + args[argIndex]);
    }
    if (parserModel == null) {
        throw new IllegalArgumentException("Need to supply a parser model with -model");
    }
    if (output == null) {
        throw new IllegalArgumentException("Need to supply an output filename with -output");
    }
    if (treebanks.size() == 0) {
        throw new IllegalArgumentException("Need to supply a treebank with -treebank");
    }
    log.info("Writing output to " + output);
    log.info("Loading parser model " + parserModel);
    log.info("Writing " + dvKBest + " hypothesis trees for each tree");
    LexicalizedParser parser = LexicalizedParser.loadModel(parserModel, "-dvKBest", Integer.toString(dvKBest));
    CacheParseHypotheses cacher = new CacheParseHypotheses(parser);
    TreeTransformer transformer = DVParser.buildTrainTransformer(parser.getOp());
    List<Tree> sentences = new ArrayList<>();
    for (Pair<String, FileFilter> description : treebanks) {
        log.info("Reading trees from " + description.first);
        Treebank treebank = parser.getOp().tlpParams.memoryTreebank();
        treebank.loadPath(description.first, description.second);
        treebank = treebank.transform(transformer);
        sentences.addAll(treebank);
    }
    log.info("Processing " + sentences.size() + " trees");
    List<Pair<Tree, byte[]>> cache = Generics.newArrayList();
    transformer = new SynchronizedTreeTransformer(transformer);
    MulticoreWrapper<Tree, Pair<Tree, byte[]>> wrapper = new MulticoreWrapper<>(numThreads, new CacheProcessor(cacher, parser, dvKBest, transformer));
    for (Tree tree : sentences) {
        wrapper.put(tree);
        while (wrapper.peek()) {
            cache.add(wrapper.poll());
            if (cache.size() % 10 == 0) {
                System.out.println("Processed " + cache.size() + " trees");
            }
        }
    }
    wrapper.join();
    while (wrapper.peek()) {
        cache.add(wrapper.poll());
        if (cache.size() % 10 == 0) {
            System.out.println("Processed " + cache.size() + " trees");
        }
    }
    System.out.println("Finished processing " + cache.size() + " trees");
    IOUtils.writeObjectToFile(cache, output);
}
Also used : MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) Treebank(edu.stanford.nlp.trees.Treebank) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) TreeTransformer(edu.stanford.nlp.trees.TreeTransformer) SynchronizedTreeTransformer(edu.stanford.nlp.trees.SynchronizedTreeTransformer) BasicCategoryTreeTransformer(edu.stanford.nlp.trees.BasicCategoryTreeTransformer) Pair(edu.stanford.nlp.util.Pair) SynchronizedTreeTransformer(edu.stanford.nlp.trees.SynchronizedTreeTransformer)

Aggregations

LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)20 Tree (edu.stanford.nlp.trees.Tree)7 Pair (edu.stanford.nlp.util.Pair)7 Treebank (edu.stanford.nlp.trees.Treebank)6 FileFilter (java.io.FileFilter)5 ArrayList (java.util.ArrayList)5 Map (java.util.Map)5 StringReader (java.io.StringReader)4 SimpleMatrix (org.ejml.simple.SimpleMatrix)4 HasWord (edu.stanford.nlp.ling.HasWord)3 EvaluateTreebank (edu.stanford.nlp.parser.lexparser.EvaluateTreebank)3 DocumentPreprocessor (edu.stanford.nlp.process.DocumentPreprocessor)3 BufferedWriter (java.io.BufferedWriter)3 FileWriter (java.io.FileWriter)3 Word (edu.stanford.nlp.ling.Word)2 ParserQuery (edu.stanford.nlp.parser.common.ParserQuery)2 Options (edu.stanford.nlp.parser.lexparser.Options)2 RerankingParserQuery (edu.stanford.nlp.parser.lexparser.RerankingParserQuery)2 DeepTree (edu.stanford.nlp.trees.DeepTree)2 TreeTransformer (edu.stanford.nlp.trees.TreeTransformer)2