use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.
the class UpdateParserOptions method main.
public static void main(String[] args) {
String input = null;
String output = null;
List<String> extraArgs = Generics.newArrayList();
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-input")) {
input = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-output")) {
output = args[argIndex + 1];
argIndex += 2;
} else {
extraArgs.add(args[argIndex++]);
}
}
LexicalizedParser parser = LexicalizedParser.loadModel(input, extraArgs);
parser.saveParserToSerialized(output);
}
use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project lucida by claritylab.
the class StanfordParser method initialize.
/**
* Initializes static resources.
*
* @throws Exception
*/
public static void initialize() throws Exception {
if (parser != null)
return;
Properties properties = Properties.loadFromClassName(StanfordParser.class.getName());
tlp = new PennTreebankLanguagePack();
String modelFile = properties.getProperty("modelFile");
if (modelFile == null)
throw new Exception("Required property '" + "modelFile' is undefined");
parser = new LexicalizedParser(modelFile);
}
use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.
the class DependencyIndexITest method testPositions.
public void testPositions() {
try {
// System.err.println();
// System.err.println("One.");
// check a tree loaded from a reader, using StringLabelFactory
Tree tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(new StringLabelFactory()))).readTree();
//System.out.println(tree.pennString());
checkTree(tree);
// System.err.println("Two.");
// check a tree created using Tree.valueOf()
tree = Tree.valueOf("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))");
//System.out.println(tree.pennString());
checkTree(tree);
// System.err.println("Three.");
// check a tree loaded from a reader, using CoreLabelFactory
tree = (new PennTreeReader(new StringReader("(S (NP (NNP Mary)) (VP (VBD had) (NP (DT a) (JJ little) (NN lamb))) (. .))"), new LabeledScoredTreeFactory(CoreLabel.factory()))).readTree();
//System.out.println(tree.pennString());
checkTree(tree);
// System.err.println("Four.");
// check a tree generated by the parser
LexicalizedParser parser = LexicalizedParser.loadModel();
tree = parser.parse("Mary had a little lamb .");
// System.out.println(tree.pennString());
tree.indexLeaves();
checkTree(tree);
} catch (IOException e) {
// this should never happen
fail("IOException shouldn't happen.");
}
}
use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.
the class AverageDVModels method main.
/**
* Command line arguments for this program:
* <br>
* -output: the model file to output
* -input: a list of model files to input
*/
public static void main(String[] args) {
String outputModelFilename = null;
List<String> inputModelFilenames = Generics.newArrayList();
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-output")) {
outputModelFilename = args[argIndex + 1];
argIndex += 2;
} else if (args[argIndex].equalsIgnoreCase("-input")) {
for (++argIndex; argIndex < args.length && !args[argIndex].startsWith("-"); ++argIndex) {
inputModelFilenames.addAll(Arrays.asList(args[argIndex].split(",")));
}
} else {
throw new RuntimeException("Unknown argument " + args[argIndex]);
}
}
if (outputModelFilename == null) {
log.info("Need to specify output model name with -output");
System.exit(2);
}
if (inputModelFilenames.size() == 0) {
log.info("Need to specify input model names with -input");
System.exit(2);
}
log.info("Averaging " + inputModelFilenames);
log.info("Outputting result to " + outputModelFilename);
LexicalizedParser lexparser = null;
List<DVModel> models = Generics.newArrayList();
for (String filename : inputModelFilenames) {
LexicalizedParser parser = LexicalizedParser.loadModel(filename);
if (lexparser == null) {
lexparser = parser;
}
models.add(DVParser.getModelFromLexicalizedParser(parser));
}
List<TwoDimensionalMap<String, String, SimpleMatrix>> binaryTransformMaps = CollectionUtils.transformAsList(models, model -> model.binaryTransform);
List<TwoDimensionalMap<String, String, SimpleMatrix>> binaryScoreMaps = CollectionUtils.transformAsList(models, model -> model.binaryScore);
List<Map<String, SimpleMatrix>> unaryTransformMaps = CollectionUtils.transformAsList(models, model -> model.unaryTransform);
List<Map<String, SimpleMatrix>> unaryScoreMaps = CollectionUtils.transformAsList(models, model -> model.unaryScore);
List<Map<String, SimpleMatrix>> wordMaps = CollectionUtils.transformAsList(models, model -> model.wordVectors);
TwoDimensionalMap<String, String, SimpleMatrix> binaryTransformAverages = averageBinaryMatrices(binaryTransformMaps);
TwoDimensionalMap<String, String, SimpleMatrix> binaryScoreAverages = averageBinaryMatrices(binaryScoreMaps);
Map<String, SimpleMatrix> unaryTransformAverages = averageUnaryMatrices(unaryTransformMaps);
Map<String, SimpleMatrix> unaryScoreAverages = averageUnaryMatrices(unaryScoreMaps);
Map<String, SimpleMatrix> wordAverages = averageUnaryMatrices(wordMaps);
DVModel newModel = new DVModel(binaryTransformAverages, unaryTransformAverages, binaryScoreAverages, unaryScoreAverages, wordAverages, lexparser.getOp());
DVParser newParser = new DVParser(newModel, lexparser);
newParser.saveModel(outputModelFilename);
}
use of edu.stanford.nlp.parser.lexparser.LexicalizedParser in project CoreNLP by stanfordnlp.
the class CacheParseHypotheses method main.
/**
* An example of a command line is
* <br>
* java -mx1g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model /scr/horatio/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached9.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-202
* <br>
* java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/wsjPCFG.nocompact.simple.ser.gz -output cached.train.simple.ser.gz -treebank /afs/ir/data/linguistic-data/Treebank/3/parsed/mrg/wsj 200-2199 -numThreads 6
* <br>
* java -mx4g edu.stanford.nlp.parser.dvparser.CacheParseHypotheses -model ~/scr/dvparser/chinese/xinhuaPCFG.ser.gz -output cached.xinhua.train.ser.gz -treebank /afs/ir/data/linguistic-data/Chinese-Treebank/6/data/utf8/bracketed 026-270,301-499,600-999
*/
public static void main(String[] args) throws IOException {
String parserModel = null;
String output = null;
List<Pair<String, FileFilter>> treebanks = Generics.newArrayList();
int dvKBest = 200;
int numThreads = 1;
for (int argIndex = 0; argIndex < args.length; ) {
if (args[argIndex].equalsIgnoreCase("-dvKBest")) {
dvKBest = Integer.valueOf(args[argIndex + 1]);
argIndex += 2;
continue;
}
if (args[argIndex].equalsIgnoreCase("-parser") || args[argIndex].equals("-model")) {
parserModel = args[argIndex + 1];
argIndex += 2;
continue;
}
if (args[argIndex].equalsIgnoreCase("-output")) {
output = args[argIndex + 1];
argIndex += 2;
continue;
}
if (args[argIndex].equalsIgnoreCase("-treebank")) {
Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-treebank");
argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
treebanks.add(treebankDescription);
continue;
}
if (args[argIndex].equalsIgnoreCase("-numThreads")) {
numThreads = Integer.valueOf(args[argIndex + 1]);
argIndex += 2;
continue;
}
throw new IllegalArgumentException("Unknown argument " + args[argIndex]);
}
if (parserModel == null) {
throw new IllegalArgumentException("Need to supply a parser model with -model");
}
if (output == null) {
throw new IllegalArgumentException("Need to supply an output filename with -output");
}
if (treebanks.size() == 0) {
throw new IllegalArgumentException("Need to supply a treebank with -treebank");
}
log.info("Writing output to " + output);
log.info("Loading parser model " + parserModel);
log.info("Writing " + dvKBest + " hypothesis trees for each tree");
LexicalizedParser parser = LexicalizedParser.loadModel(parserModel, "-dvKBest", Integer.toString(dvKBest));
CacheParseHypotheses cacher = new CacheParseHypotheses(parser);
TreeTransformer transformer = DVParser.buildTrainTransformer(parser.getOp());
List<Tree> sentences = new ArrayList<>();
for (Pair<String, FileFilter> description : treebanks) {
log.info("Reading trees from " + description.first);
Treebank treebank = parser.getOp().tlpParams.memoryTreebank();
treebank.loadPath(description.first, description.second);
treebank = treebank.transform(transformer);
sentences.addAll(treebank);
}
log.info("Processing " + sentences.size() + " trees");
List<Pair<Tree, byte[]>> cache = Generics.newArrayList();
transformer = new SynchronizedTreeTransformer(transformer);
MulticoreWrapper<Tree, Pair<Tree, byte[]>> wrapper = new MulticoreWrapper<>(numThreads, new CacheProcessor(cacher, parser, dvKBest, transformer));
for (Tree tree : sentences) {
wrapper.put(tree);
while (wrapper.peek()) {
cache.add(wrapper.poll());
if (cache.size() % 10 == 0) {
System.out.println("Processed " + cache.size() + " trees");
}
}
}
wrapper.join();
while (wrapper.peek()) {
cache.add(wrapper.poll());
if (cache.size() % 10 == 0) {
System.out.println("Processed " + cache.size() + " trees");
}
}
System.out.println("Finished processing " + cache.size() + " trees");
IOUtils.writeObjectToFile(cache, output);
}
Aggregations