Search in sources :

Example 1 with ParserQuery

use of edu.stanford.nlp.parser.common.ParserQuery in project CoreNLP by stanfordnlp.

the class ParserAnnotator method doOneSentence.

private List<Tree> doOneSentence(List<ParserConstraint> constraints, List<CoreLabel> words) {
    ParserQuery pq = parser.parserQuery();
    List<Tree> trees = Generics.newLinkedList();
    try {
        // Use bestParse if kBest is set to 1.
        if (this.kBest == 1) {
            Tree t = pq.getBestParse();
            if (t == null) {
                log.warn("Parsing of sentence failed.  " + "Will ignore and continue: " + SentenceUtils.listToString(words));
            } else {
                double score = pq.getBestScore();
                t.setScore(score % -10000.0);
        } else {
            List<ScoredObject<Tree>> scoredObjects = pq.getKBestParses(this.kBest);
            if (scoredObjects == null || scoredObjects.size() < 1) {
                log.warn("Parsing of sentence failed.  " + "Will ignore and continue: " + SentenceUtils.listToString(words));
            } else {
                for (ScoredObject<Tree> so : scoredObjects) {
                    // -10000 denotes unknown words
                    Tree tree = so.object();
                    tree.setScore(so.score() % -10000.0);
    } catch (OutOfMemoryError e) {
        // Beware that we can now get an OOM in logging, too.
        log.warn("Parsing of sentence ran out of memory (length=" + words.size() + ").  " + "Will ignore and try to continue.");
    } catch (NoSuchParseException e) {
        log.warn("Parsing of sentence failed, possibly because of out of memory.  " + "Will ignore and continue: " + SentenceUtils.listToString(words));
    return trees;
Also used : NoSuchParseException(edu.stanford.nlp.parser.common.NoSuchParseException) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery)

Example 2 with ParserQuery

use of edu.stanford.nlp.parser.common.ParserQuery in project CoreNLP by stanfordnlp.

the class LexicalizedParserITest method testParserQuery.

 * Test the query structure that you can use for better control of
 * the parse
public void testParserQuery() {
    List<CoreLabel> sentence = sampleSausage();
    ParserQuery pq = englishParser.parserQuery();
    compareSingleOutput(pq.getBestParse(), false, pennPrint, "(ROOT (S (NP (PRP$ My) (NN dog)) (ADVP (RB also)) (VP (VBZ likes) (S (VP (VBG eating) (NP (NN sausage))))) (. .)))");
Also used : CoreLabel(edu.stanford.nlp.ling.CoreLabel) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) Test(org.junit.Test)

Example 3 with ParserQuery

use of edu.stanford.nlp.parser.common.ParserQuery in project CoreNLP by stanfordnlp.

the class ParseFiles method parseFiles.

public void parseFiles(String[] args, int argIndex, boolean tokenized, TokenizerFactory<? extends HasWord> tokenizerFactory, String elementDelimiter, String sentenceDelimiter, Function<List<HasWord>, List<HasWord>> escaper, String tagDelimiter) {
    final DocType docType = (elementDelimiter == null) ? DocType.Plain : DocType.XML;
    if (op.testOptions.verbose) {
        if (tokenizerFactory != null)
            pwErr.println("parseFiles: Tokenizer factory is: " + tokenizerFactory);
    final Timing timer = new Timing();
    // Loop over the files
    for (int i = argIndex; i < args.length; i++) {
        final String filename = args[i];
        final DocumentPreprocessor documentPreprocessor;
        if (filename.equals("-")) {
            try {
                documentPreprocessor = new DocumentPreprocessor(IOUtils.readerFromStdin(op.tlpParams.getInputEncoding()), docType);
            } catch (IOException e) {
                throw new RuntimeIOException(e);
        } else {
            documentPreprocessor = new DocumentPreprocessor(filename, docType, op.tlpParams.getInputEncoding());
        // Unused values are null per the main() method invocation below
        // null is the default for these properties
        if (tokenizerFactory == null)
            documentPreprocessor.setTokenizerFactory((tokenized) ? null : tlp.getTokenizerFactory());
        // Setup the output
        PrintWriter pwo = pwOut;
        if (op.testOptions.writeOutputFiles) {
            String normalizedName = filename;
            try {
                // this will exception if not a URL
                new URL(normalizedName);
                normalizedName = normalizedName.replaceAll("/", "_");
            } catch (MalformedURLException e) {
            // It isn't a URL, so silently ignore
            String ext = (op.testOptions.outputFilesExtension == null) ? "stp" : op.testOptions.outputFilesExtension;
            String fname = normalizedName + '.' + ext;
            if (op.testOptions.outputFilesDirectory != null && !op.testOptions.outputFilesDirectory.isEmpty()) {
                String fseparator = System.getProperty("file.separator");
                if (fseparator == null || fseparator.isEmpty()) {
                    fseparator = "/";
                File fnameFile = new File(fname);
                fname = op.testOptions.outputFilesDirectory + fseparator + fnameFile.getName();
            try {
                pwo = FileOutputStream(fname));
            } catch (IOException ioe) {
                throw new RuntimeIOException(ioe);
        treePrint.printHeader(pwo, op.tlpParams.getOutputEncoding());
        pwErr.println("Parsing file: " + filename);
        int num = 0;
        int numProcessed = 0;
        if (op.testOptions.testingThreads != 1) {
            MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
            for (List<HasWord> sentence : documentPreprocessor) {
                int len = sentence.size();
                numWords += len;
                pwErr.println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.listToString(sentence, true));
                while (wrapper.peek()) {
                    ParserQuery pq = wrapper.poll();
                    processResults(pq, numProcessed++, pwo);
            while (wrapper.peek()) {
                ParserQuery pq = wrapper.poll();
                processResults(pq, numProcessed++, pwo);
        } else {
            ParserQuery pq = pqFactory.parserQuery();
            for (List<HasWord> sentence : documentPreprocessor) {
                int len = sentence.size();
                numWords += len;
                pwErr.println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.listToString(sentence, true));
                pq.parseAndReport(sentence, pwErr);
                processResults(pq, numProcessed++, pwo);
        if (op.testOptions.writeOutputFiles)
        pwErr.println("Parsed file: " + filename + " [" + num + " sentences].");
    long millis = timer.stop();
    if (summary) {
        if (pcfgLL != null)
            pcfgLL.display(false, pwErr);
        if (depLL != null)
            depLL.display(false, pwErr);
        if (factLL != null)
            factLL.display(false, pwErr);
    if (saidMemMessage) {
    double wordspersec = numWords / (((double) millis) / 1000);
    double sentspersec = numSents / (((double) millis) / 1000);
    // easier way!
    NumberFormat nf = new DecimalFormat("0.00");
    pwErr.println("Parsed " + numWords + " words in " + numSents + " sentences (" + nf.format(wordspersec) + " wds/sec; " + nf.format(sentspersec) + " sents/sec).");
    if (numFallback > 0) {
        pwErr.println("  " + numFallback + " sentences were parsed by fallback to PCFG.");
    if (numUnparsable > 0 || numNoMemory > 0 || numSkipped > 0) {
        pwErr.println("  " + (numUnparsable + numNoMemory + numSkipped) + " sentences were not parsed:");
        if (numUnparsable > 0) {
            pwErr.println("    " + numUnparsable + " were not parsable with non-zero probability.");
        if (numNoMemory > 0) {
            pwErr.println("    " + numNoMemory + " were skipped because of insufficient memory.");
        if (numSkipped > 0) {
            pwErr.println("    " + numSkipped + " were skipped as length 0 or greater than " + op.testOptions.maxLength);
Also used : HasWord(edu.stanford.nlp.ling.HasWord) RuntimeIOException( MalformedURLException( MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) DecimalFormat(java.text.DecimalFormat) RuntimeIOException( IOException( URL( ParsingThreadsafeProcessor(edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor) FileOutputStream( List(java.util.List) Timing(edu.stanford.nlp.util.Timing) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor) File( DocType(edu.stanford.nlp.process.DocumentPreprocessor.DocType) PrintWriter( ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) NumberFormat(java.text.NumberFormat)

Example 4 with ParserQuery

use of edu.stanford.nlp.parser.common.ParserQuery in project CoreNLP by stanfordnlp.

the class DVParser method getTopParsesForOneTree.

public static List<Tree> getTopParsesForOneTree(LexicalizedParser parser, int dvKBest, Tree tree, TreeTransformer transformer) {
    ParserQuery pq = parser.parserQuery();
    List<Word> sentence = tree.yieldWords();
    // sentence symbol
    if (sentence.size() <= 1) {
        return null;
    sentence = sentence.subList(0, sentence.size() - 1);
    if (!pq.parse(sentence)) {"Failed to use the given parser to reparse sentence \"" + sentence + "\"");
        return null;
    List<Tree> parses = new ArrayList<>();
    List<ScoredObject<Tree>> bestKParses = pq.getKBestPCFGParses(dvKBest);
    for (ScoredObject<Tree> so : bestKParses) {
        Tree result = so.object();
        if (transformer != null) {
            result = transformer.transformTree(result);
    return parses;
Also used : Word(edu.stanford.nlp.ling.Word) ScoredObject(edu.stanford.nlp.util.ScoredObject) ArrayList(java.util.ArrayList) Tree(edu.stanford.nlp.trees.Tree) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery)

Example 5 with ParserQuery

use of edu.stanford.nlp.parser.common.ParserQuery in project CoreNLP by stanfordnlp.

the class FindNearestNeighbors method main.

public static void main(String[] args) throws Exception {
    String modelPath = null;
    String outputPath = null;
    String testTreebankPath = null;
    FileFilter testTreebankFilter = null;
    List<String> unusedArgs = new ArrayList<>();
    for (int argIndex = 0; argIndex < args.length; ) {
        if (args[argIndex].equalsIgnoreCase("-model")) {
            modelPath = args[argIndex + 1];
            argIndex += 2;
        } else if (args[argIndex].equalsIgnoreCase("-testTreebank")) {
            Pair<String, FileFilter> treebankDescription = ArgUtils.getTreebankDescription(args, argIndex, "-testTreebank");
            argIndex = argIndex + ArgUtils.numSubArgs(args, argIndex) + 1;
            testTreebankPath = treebankDescription.first();
            testTreebankFilter = treebankDescription.second();
        } else if (args[argIndex].equalsIgnoreCase("-output")) {
            outputPath = args[argIndex + 1];
            argIndex += 2;
        } else {
    if (modelPath == null) {
        throw new IllegalArgumentException("Need to specify -model");
    if (testTreebankPath == null) {
        throw new IllegalArgumentException("Need to specify -testTreebank");
    if (outputPath == null) {
        throw new IllegalArgumentException("Need to specify -output");
    String[] newArgs = unusedArgs.toArray(new String[unusedArgs.size()]);
    LexicalizedParser lexparser = LexicalizedParser.loadModel(modelPath, newArgs);
    Treebank testTreebank = null;
    if (testTreebankPath != null) {"Reading in trees from " + testTreebankPath);
        if (testTreebankFilter != null) {
  "Filtering on " + testTreebankFilter);
        testTreebank = lexparser.getOp().tlpParams.memoryTreebank();
        testTreebank.loadPath(testTreebankPath, testTreebankFilter);"Read in " + testTreebank.size() + " trees for testing");
    FileWriter out = new FileWriter(outputPath);
    BufferedWriter bout = new BufferedWriter(out);"Parsing " + testTreebank.size() + " trees");
    int count = 0;
    List<ParseRecord> records = Generics.newArrayList();
    for (Tree goldTree : testTreebank) {
        List<Word> tokens = goldTree.yieldWords();
        ParserQuery parserQuery = lexparser.parserQuery();
        if (!parserQuery.parse(tokens)) {
            throw new AssertionError("Could not parse: " + tokens);
        if (!(parserQuery instanceof RerankingParserQuery)) {
            throw new IllegalArgumentException("Expected a LexicalizedParser with a Reranker attached");
        RerankingParserQuery rpq = (RerankingParserQuery) parserQuery;
        if (!(rpq.rerankerQuery() instanceof DVModelReranker.Query)) {
            throw new IllegalArgumentException("Expected a LexicalizedParser with a DVModel attached");
        DeepTree tree = ((DVModelReranker.Query) rpq.rerankerQuery()).getDeepTrees().get(0);
        SimpleMatrix rootVector = null;
        for (Map.Entry<Tree, SimpleMatrix> entry : tree.getVectors().entrySet()) {
            if (entry.getKey().label().value().equals("ROOT")) {
                rootVector = entry.getValue();
        if (rootVector == null) {
            throw new AssertionError("Could not find root nodevector");
        out.write(tokens + "\n");
        out.write(tree.getTree() + "\n");
        for (int i = 0; i < rootVector.getNumElements(); ++i) {
            out.write("  " + rootVector.get(i));
        if (count % 10 == 0) {
  "  " + count);
        records.add(new ParseRecord(tokens, goldTree, tree.getTree(), rootVector, tree.getVectors()));
    }"  done parsing");
    List<Pair<Tree, SimpleMatrix>> subtrees = Generics.newArrayList();
    for (ParseRecord record : records) {
        for (Map.Entry<Tree, SimpleMatrix> entry : record.nodeVectors.entrySet()) {
            if (entry.getKey().getLeaves().size() <= maxLength) {
                subtrees.add(Pair.makePair(entry.getKey(), entry.getValue()));
    }"There are " + subtrees.size() + " subtrees in the set of trees");
    PriorityQueue<ScoredObject<Pair<Tree, Tree>>> bestmatches = new PriorityQueue<>(101, ScoredComparator.DESCENDING_COMPARATOR);
    for (int i = 0; i < subtrees.size(); ++i) {;;
        for (int j = 0; j < subtrees.size(); ++j) {
            if (i == j) {
            // TODO: look at basic category?
            double normF = subtrees.get(i).second().minus(subtrees.get(j).second()).normF();
            bestmatches.add(new ScoredObject<>(Pair.makePair(subtrees.get(i).first(), subtrees.get(j).first()), normF));
            if (bestmatches.size() > 100) {
        List<ScoredObject<Pair<Tree, Tree>>> ordered = Generics.newArrayList();
        while (bestmatches.size() > 0) {
        for (ScoredObject<Pair<Tree, Tree>> pair : ordered) {
  " MATCHED " + pair.object().second.yieldWords() + " ... " + pair.object().second() + " with a score of " + pair.score());
    for (int i = 0; i < records.size(); ++i) {
      if (i % 10 == 0) {"  " + i);
      List<ScoredObject<ParseRecord>> scored = Generics.newArrayList();
      for (int j = 0; j < records.size(); ++j) {
        if (i == j) continue;

        double score = 0.0;
        int matches = 0;
        for (Map.Entry<Tree, SimpleMatrix> first : records.get(i).nodeVectors.entrySet()) {
          for (Map.Entry<Tree, SimpleMatrix> second : records.get(j).nodeVectors.entrySet()) {
            String firstBasic = dvparser.dvModel.basicCategory(first.getKey().label().value());
            String secondBasic = dvparser.dvModel.basicCategory(second.getKey().label().value());
            if (firstBasic.equals(secondBasic)) {
              double normF = first.getValue().minus(second.getValue()).normF();
              score += normF * normF;
        if (matches == 0) {
          score = Double.POSITIVE_INFINITY;
        } else {
          score = score / matches;
        //double score = records.get(i).vector.minus(records.get(j).vector).normF();
        scored.add(new ScoredObject<ParseRecord>(records.get(j), score));
      Collections.sort(scored, ScoredComparator.ASCENDING_COMPARATOR);

      out.write(records.get(i).sentence.toString() + "\n");
      for (int j = 0; j < numNeighbors; ++j) {
        out.write("   " + scored.get(j).score() + ": " + scored.get(j).object().sentence + "\n");
Also used : Word(edu.stanford.nlp.ling.Word) RerankingParserQuery(edu.stanford.nlp.parser.lexparser.RerankingParserQuery) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) Treebank(edu.stanford.nlp.trees.Treebank) LexicalizedParser(edu.stanford.nlp.parser.lexparser.LexicalizedParser) FileWriter( ArrayList(java.util.ArrayList) BufferedWriter( SimpleMatrix(org.ejml.simple.SimpleMatrix) ScoredObject(edu.stanford.nlp.util.ScoredObject) DeepTree(edu.stanford.nlp.trees.DeepTree) Tree(edu.stanford.nlp.trees.Tree) DeepTree(edu.stanford.nlp.trees.DeepTree) FileFilter( RerankingParserQuery(edu.stanford.nlp.parser.lexparser.RerankingParserQuery) Pair(edu.stanford.nlp.util.Pair) PriorityQueue(java.util.PriorityQueue) IdentityHashMap(java.util.IdentityHashMap) Map(java.util.Map) RerankingParserQuery(edu.stanford.nlp.parser.lexparser.RerankingParserQuery) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery)


ParserQuery (edu.stanford.nlp.parser.common.ParserQuery)12 Tree (edu.stanford.nlp.trees.Tree)6 Pair (edu.stanford.nlp.util.Pair)4 PrintWriter ( ArrayList (java.util.ArrayList)4 RuntimeIOException ( CoreLabel (edu.stanford.nlp.ling.CoreLabel)3 HasWord (edu.stanford.nlp.ling.HasWord)3 Timing (edu.stanford.nlp.util.Timing)3 FileOutputStream ( IOException ( DecimalFormat (java.text.DecimalFormat)3 NumberFormat (java.text.NumberFormat)3 Test (org.junit.Test)3 NullOutputStream ( Word (edu.stanford.nlp.ling.Word)2 ParserConstraint (edu.stanford.nlp.parser.common.ParserConstraint)2 ParsingThreadsafeProcessor (edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor)2 LexicalizedParser (edu.stanford.nlp.parser.lexparser.LexicalizedParser)2 RerankingParserQuery (edu.stanford.nlp.parser.lexparser.RerankingParserQuery)2