Search in sources :

Example 26 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class LexicalizedParser method saveParserToTextFile.

/**
 * Saves the parser defined by pd to the given filename.
 * If there is an error, a RuntimeIOException is thrown.
 */
// todo: [cdm 2015] This doesn't use character encoding and it should!
public void saveParserToTextFile(String filename) {
    if (reranker != null) {
        throw new UnsupportedOperationException("Sorry, but parsers with rerankers cannot be saved to text file");
    }
    try {
        log.info("Writing parser in text grammar format to file " + filename);
        OutputStream os;
        if (filename.endsWith(".gz")) {
            // it's faster to do the buffering _outside_ the gzipping as here
            os = new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(filename)));
        } else {
            os = new BufferedOutputStream(new FileOutputStream(filename));
        }
        PrintWriter out = new PrintWriter(os);
        String prefix = "BEGIN ";
        out.println(prefix + "OPTIONS");
        op.writeData(out);
        out.println();
        log.info(".");
        out.println(prefix + "STATE_INDEX");
        stateIndex.saveToWriter(out);
        out.println();
        log.info(".");
        out.println(prefix + "WORD_INDEX");
        wordIndex.saveToWriter(out);
        out.println();
        log.info(".");
        out.println(prefix + "TAG_INDEX");
        tagIndex.saveToWriter(out);
        out.println();
        log.info(".");
        String uwmClazz = ((lex.getUnknownWordModel() == null) ? "null" : lex.getUnknownWordModel().getClass().getCanonicalName());
        out.println(prefix + "LEXICON " + uwmClazz);
        lex.writeData(out);
        out.println();
        log.info(".");
        out.println(prefix + "UNARY_GRAMMAR");
        ug.writeData(out);
        out.println();
        log.info(".");
        out.println(prefix + "BINARY_GRAMMAR");
        bg.writeData(out);
        out.println();
        log.info(".");
        out.println(prefix + "DEPENDENCY_GRAMMAR");
        if (dg != null) {
            dg.writeData(out);
        }
        out.println();
        log.info(".");
        out.flush();
        out.close();
        log.info("done.");
    } catch (IOException e) {
        log.info("Trouble saving parser data to ASCII format.");
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) GZIPOutputStream(java.util.zip.GZIPOutputStream) GZIPOutputStream(java.util.zip.GZIPOutputStream) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 27 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class ParseFiles method parseFiles.

public void parseFiles(String[] args, int argIndex, boolean tokenized, TokenizerFactory<? extends HasWord> tokenizerFactory, String elementDelimiter, String sentenceDelimiter, Function<List<HasWord>, List<HasWord>> escaper, String tagDelimiter) {
    final DocType docType = (elementDelimiter == null) ? DocType.Plain : DocType.XML;
    if (op.testOptions.verbose) {
        if (tokenizerFactory != null)
            pwErr.println("parseFiles: Tokenizer factory is: " + tokenizerFactory);
    }
    final Timing timer = new Timing();
    // Loop over the files
    for (int i = argIndex; i < args.length; i++) {
        final String filename = args[i];
        final DocumentPreprocessor documentPreprocessor;
        if (filename.equals("-")) {
            try {
                documentPreprocessor = new DocumentPreprocessor(IOUtils.readerFromStdin(op.tlpParams.getInputEncoding()), docType);
            } catch (IOException e) {
                throw new RuntimeIOException(e);
            }
        } else {
            documentPreprocessor = new DocumentPreprocessor(filename, docType, op.tlpParams.getInputEncoding());
        }
        // Unused values are null per the main() method invocation below
        // null is the default for these properties
        documentPreprocessor.setSentenceFinalPuncWords(tlp.sentenceFinalPunctuationWords());
        documentPreprocessor.setEscaper(escaper);
        documentPreprocessor.setSentenceDelimiter(sentenceDelimiter);
        documentPreprocessor.setTagDelimiter(tagDelimiter);
        documentPreprocessor.setElementDelimiter(elementDelimiter);
        if (tokenizerFactory == null)
            documentPreprocessor.setTokenizerFactory((tokenized) ? null : tlp.getTokenizerFactory());
        else
            documentPreprocessor.setTokenizerFactory(tokenizerFactory);
        // Setup the output
        PrintWriter pwo = pwOut;
        if (op.testOptions.writeOutputFiles) {
            String normalizedName = filename;
            try {
                // this will exception if not a URL
                new URL(normalizedName);
                normalizedName = normalizedName.replaceAll("/", "_");
            } catch (MalformedURLException e) {
            // It isn't a URL, so silently ignore
            }
            String ext = (op.testOptions.outputFilesExtension == null) ? "stp" : op.testOptions.outputFilesExtension;
            String fname = normalizedName + '.' + ext;
            if (op.testOptions.outputFilesDirectory != null && !op.testOptions.outputFilesDirectory.isEmpty()) {
                String fseparator = System.getProperty("file.separator");
                if (fseparator == null || fseparator.isEmpty()) {
                    fseparator = "/";
                }
                File fnameFile = new File(fname);
                fname = op.testOptions.outputFilesDirectory + fseparator + fnameFile.getName();
            }
            try {
                pwo = op.tlpParams.pw(new FileOutputStream(fname));
            } catch (IOException ioe) {
                throw new RuntimeIOException(ioe);
            }
        }
        treePrint.printHeader(pwo, op.tlpParams.getOutputEncoding());
        pwErr.println("Parsing file: " + filename);
        int num = 0;
        int numProcessed = 0;
        if (op.testOptions.testingThreads != 1) {
            MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
            for (List<HasWord> sentence : documentPreprocessor) {
                num++;
                numSents++;
                int len = sentence.size();
                numWords += len;
                pwErr.println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.listToString(sentence, true));
                wrapper.put(sentence);
                while (wrapper.peek()) {
                    ParserQuery pq = wrapper.poll();
                    processResults(pq, numProcessed++, pwo);
                }
            }
            wrapper.join();
            while (wrapper.peek()) {
                ParserQuery pq = wrapper.poll();
                processResults(pq, numProcessed++, pwo);
            }
        } else {
            ParserQuery pq = pqFactory.parserQuery();
            for (List<HasWord> sentence : documentPreprocessor) {
                num++;
                numSents++;
                int len = sentence.size();
                numWords += len;
                pwErr.println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.listToString(sentence, true));
                pq.parseAndReport(sentence, pwErr);
                processResults(pq, numProcessed++, pwo);
            }
        }
        treePrint.printFooter(pwo);
        if (op.testOptions.writeOutputFiles)
            pwo.close();
        pwErr.println("Parsed file: " + filename + " [" + num + " sentences].");
    }
    long millis = timer.stop();
    if (summary) {
        if (pcfgLL != null)
            pcfgLL.display(false, pwErr);
        if (depLL != null)
            depLL.display(false, pwErr);
        if (factLL != null)
            factLL.display(false, pwErr);
    }
    if (saidMemMessage) {
        ParserUtils.printOutOfMemory(pwErr);
    }
    double wordspersec = numWords / (((double) millis) / 1000);
    double sentspersec = numSents / (((double) millis) / 1000);
    // easier way!
    NumberFormat nf = new DecimalFormat("0.00");
    pwErr.println("Parsed " + numWords + " words in " + numSents + " sentences (" + nf.format(wordspersec) + " wds/sec; " + nf.format(sentspersec) + " sents/sec).");
    if (numFallback > 0) {
        pwErr.println("  " + numFallback + " sentences were parsed by fallback to PCFG.");
    }
    if (numUnparsable > 0 || numNoMemory > 0 || numSkipped > 0) {
        pwErr.println("  " + (numUnparsable + numNoMemory + numSkipped) + " sentences were not parsed:");
        if (numUnparsable > 0) {
            pwErr.println("    " + numUnparsable + " were not parsable with non-zero probability.");
        }
        if (numNoMemory > 0) {
            pwErr.println("    " + numNoMemory + " were skipped because of insufficient memory.");
        }
        if (numSkipped > 0) {
            pwErr.println("    " + numSkipped + " were skipped as length 0 or greater than " + op.testOptions.maxLength);
        }
    }
}
Also used : HasWord(edu.stanford.nlp.ling.HasWord) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) MalformedURLException(java.net.MalformedURLException) MulticoreWrapper(edu.stanford.nlp.util.concurrent.MulticoreWrapper) DecimalFormat(java.text.DecimalFormat) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException) URL(java.net.URL) ParsingThreadsafeProcessor(edu.stanford.nlp.parser.common.ParsingThreadsafeProcessor) FileOutputStream(java.io.FileOutputStream) List(java.util.List) Timing(edu.stanford.nlp.util.Timing) DocumentPreprocessor(edu.stanford.nlp.process.DocumentPreprocessor) File(java.io.File) DocType(edu.stanford.nlp.process.DocumentPreprocessor.DocType) PrintWriter(java.io.PrintWriter) ParserQuery(edu.stanford.nlp.parser.common.ParserQuery) NumberFormat(java.text.NumberFormat)

Example 28 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class DVParser method loadModel.

public static DVParser loadModel(String filename, String[] args) {
    log.info("Loading serialized model from " + filename);
    DVParser dvparser;
    try {
        dvparser = IOUtils.readObjectFromURLOrClasspathOrFileSystem(filename);
        dvparser.op.setOptions(args);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    } catch (ClassNotFoundException e) {
        throw new RuntimeIOException(e);
    }
    log.info("... done");
    return dvparser;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException)

Example 29 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class QNMinimizer method minimize.

public double[] minimize(DiffFunction dFunction, double functionTolerance, double[] initial, int maxFunctionEvaluations, QNInfo qn) {
    if (!quiet) {
        log.info("QNMinimizer called on double function of " + dFunction.domainDimension() + " variables, using " + (mem > 0 ? "M = " + mem : "dynamic settings of M") + '.');
    }
    if (qn == null && presetInfo == null) {
        qn = scaleOpt == eScaling.SCALAR ? new ScalarQNInfo(mem) : new DiagonalQNInfo(mem);
        noHistory = true;
    } else if (presetInfo != null) {
        qn = presetInfo;
        noHistory = false;
    } else if (qn != null) {
        noHistory = false;
    }
    its = 0;
    fevals = 0;
    success = false;
    // initialize weights
    double[] x = initial;
    // initialize gradient
    double[] rawGrad = new double[x.length];
    double[] newGrad = new double[x.length];
    double[] newX = new double[x.length];
    double[] dir = new double[x.length];
    // initialize function value and gradient (gradient is stored in grad inside
    // evaluateFunction)
    double value = evaluateFunction(dFunction, x, rawGrad);
    double[] grad;
    if (useOWLQN) {
        double norm = l1NormOWL(x, dFunction);
        value += norm * lambdaOWL;
        // step (1) in Galen & Gao except we are not computing v yet
        grad = pseudoGradientOWL(x, rawGrad, dFunction);
    } else {
        grad = rawGrad;
    }
    PrintWriter outFile = null;
    PrintWriter infoFile = null;
    if (outputToFile) {
        try {
            String baseName = "QN_m" + mem + '_' + lsOpt.toString() + '_' + scaleOpt.toString();
            outFile = new PrintWriter(new FileOutputStream(baseName + ".output"), true);
            infoFile = new PrintWriter(new FileOutputStream(baseName + ".info"), true);
            infoFile.println(dFunction.domainDimension() + "; DomainDimension ");
            infoFile.println(mem + "; memory");
        } catch (IOException e) {
            throw new RuntimeIOException("Caught IOException outputting QN data to file", e);
        }
    }
    Record rec = new Record(monitor, functionTolerance, outFile);
    // sets the original gradient and x. Also stores the monitor.
    rec.start(value, rawGrad, x);
    // Check if max Evaluations and Iterations have been provided.
    maxFevals = (maxFunctionEvaluations > 0) ? maxFunctionEvaluations : Integer.MAX_VALUE;
    if (!quiet) {
        log.info("               An explanation of the output:");
        log.info("Iter           The number of iterations");
        log.info("evals          The number of function evaluations");
        log.info("SCALING        <D> Diagonal scaling was used; <I> Scaled Identity");
        log.info("LINESEARCH     [## M steplength]  Minpack linesearch");
        log.info("                   1-Function value was too high");
        log.info("                   2-Value ok, gradient positive, positive curvature");
        log.info("                   3-Value ok, gradient negative, positive curvature");
        log.info("                   4-Value ok, gradient negative, negative curvature");
        log.info("               [.. B]  Backtracking");
        log.info("VALUE          The current function value");
        log.info("TIME           Total elapsed time");
        log.info("|GNORM|        The current norm of the gradient");
        log.info("{RELNORM}      The ratio of the current to initial gradient norms");
        log.info("AVEIMPROVE     The average improvement / current value");
        log.info("EVALSCORE      The last available eval score");
        log.info(" ");
        log.info("Iter ## evals ## <SCALING> [LINESEARCH] VALUE TIME |GNORM| {RELNORM} AVEIMPROVE EVALSCORE");
    }
    StringBuilder sb = new StringBuilder(100);
    eState state = eState.CONTINUE;
    // Beginning of the loop.
    do {
        try {
            if (!quiet) {
                log.info(sb.toString());
            }
            sb.setLength(0);
            boolean doEval = (its >= 0 && its >= startEvaluateIters && evaluateIters > 0 && its % evaluateIters == 0);
            its += 1;
            double newValue;
            sb.append("Iter ").append(its).append(" evals ").append(fevals).append(' ');
            // Compute the search direction
            sb.append('<');
            computeDir(dir, grad, x, qn, dFunction, sb);
            sb.append("> ");
            // sanity check dir
            boolean hasNaNDir = false;
            boolean hasNaNGrad = false;
            for (int i = 0; i < dir.length; i++) {
                if (dir[i] != dir[i])
                    hasNaNDir = true;
                if (grad[i] != grad[i])
                    hasNaNGrad = true;
            }
            if (hasNaNDir && !hasNaNGrad) {
                if (!quiet)
                    log.info("(NaN dir likely due to Hessian approx - resetting) ");
                qn.clear();
                // re-compute the search direction
                sb.append('<');
                computeDir(dir, grad, x, qn, dFunction, sb);
                sb.append("> ");
            }
            // perform line search
            sb.append('[');
            // initialized in if/else/switch below
            double[] newPoint;
            if (useOWLQN) {
                // only linear search is allowed for OWL-QN
                newPoint = lineSearchBacktrackOWL(dFunction, dir, x, newX, grad, value, sb);
                sb.append('B');
            } else {
                // switch between line search options.
                switch(lsOpt) {
                    case BACKTRACK:
                        newPoint = lineSearchBacktrack(dFunction, dir, x, newX, grad, value, sb);
                        sb.append('B');
                        break;
                    case MINPACK:
                        newPoint = lineSearchMinPack(dFunction, dir, x, newX, grad, value, functionTolerance, sb);
                        sb.append('M');
                        break;
                    default:
                        throw new IllegalArgumentException("Invalid line search option for QNMinimizer.");
                }
            }
            newValue = newPoint[f];
            sb.append(' ').append(nf.format(newPoint[a])).append("] ");
            // This shouldn't actually evaluate anything since that should have been
            // done in the lineSearch.
            System.arraycopy(dFunction.derivativeAt(newX), 0, newGrad, 0, newGrad.length);
            // This is where all the s, y updates are applied.
            // step (4) in Galen & Gao 2007
            qn.update(newX, x, newGrad, rawGrad, newPoint[a]);
            if (useOWLQN) {
                System.arraycopy(newGrad, 0, rawGrad, 0, newGrad.length);
                // pseudo gradient
                newGrad = pseudoGradientOWL(newX, newGrad, dFunction);
            }
            double evalScore = Double.NEGATIVE_INFINITY;
            if (doEval) {
                evalScore = doEvaluation(newX);
            }
            // Add the current value and gradient to the records, this also monitors
            // X and writes to output
            rec.add(newValue, newGrad, newX, fevals, evalScore, sb);
            // If you want to call a function and do whatever with the information ...
            if (iterCallbackFunction != null) {
                iterCallbackFunction.callback(newX, its, newValue, newGrad);
            }
            // shift
            value = newValue;
            // double[] temp = x;
            // x = newX;
            // newX = temp;
            System.arraycopy(newX, 0, x, 0, x.length);
            System.arraycopy(newGrad, 0, grad, 0, newGrad.length);
            if (fevals > maxFevals) {
                throw new MaxEvaluationsExceeded("Exceeded in minimize() loop.");
            }
        } catch (SurpriseConvergence s) {
            if (!quiet)
                log.info("QNMinimizer aborted due to surprise convergence");
            break;
        } catch (MaxEvaluationsExceeded m) {
            if (!quiet) {
                log.info("QNMinimizer aborted due to maximum number of function evaluations");
                log.info(m.toString());
                log.info("** This is not an acceptable termination of QNMinimizer, consider");
                log.info("** increasing the max number of evaluations, or safeguarding your");
                log.info("** program by checking the QNMinimizer.wasSuccessful() method.");
            }
            break;
        } catch (OutOfMemoryError oome) {
            if (qn.used > 1) {
                qn.removeFirst();
                sb.append("{Caught OutOfMemory, changing m from ").append(qn.mem).append(" to ").append(qn.used).append("}]");
                qn.mem = qn.used;
            } else {
                throw oome;
            }
        }
    } while (// end do while
    (state = rec.toContinue(sb)) == eState.CONTINUE);
    if (evaluateIters > 0) {
        // do final evaluation
        double evalScore = (useEvalImprovement ? doEvaluation(rec.getBest()) : doEvaluation(x));
        if (!quiet)
            log.info("final evalScore is: " + evalScore);
    }
    // 
    switch(state) {
        case TERMINATE_GRADNORM:
            if (!quiet)
                log.info("QNMinimizer terminated due to numerically zero gradient: |g| < EPS  max(1,|x|) ");
            success = true;
            break;
        case TERMINATE_RELATIVENORM:
            if (!quiet)
                log.info("QNMinimizer terminated due to sufficient decrease in gradient norms: |g|/|g0| < TOL ");
            success = true;
            break;
        case TERMINATE_AVERAGEIMPROVE:
            if (!quiet)
                log.info("QNMinimizer terminated due to average improvement: | newest_val - previous_val | / |newestVal| < TOL ");
            success = true;
            break;
        case TERMINATE_MAXITR:
            if (!quiet)
                log.info("QNMinimizer terminated due to reached max iteration " + maxItr);
            success = true;
            break;
        case TERMINATE_EVALIMPROVE:
            if (!quiet)
                log.info("QNMinimizer terminated due to no improvement on eval ");
            success = true;
            x = rec.getBest();
            break;
        default:
            log.warn("QNMinimizer terminated without converging");
            success = false;
            break;
    }
    double completionTime = rec.howLong();
    if (!quiet)
        log.info("Total time spent in optimization: " + nfsec.format(completionTime) + 's');
    if (outputToFile) {
        infoFile.println(completionTime + "; Total Time ");
        infoFile.println(fevals + "; Total evaluations");
        infoFile.close();
        outFile.close();
    }
    qn.free();
    return x;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) FileOutputStream(java.io.FileOutputStream) PrintWriter(java.io.PrintWriter)

Example 30 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class ParserGrammar method loadModel.

public static ParserGrammar loadModel(String path, String... extraFlags) {
    ParserGrammar parser;
    try {
        Timing timing = new Timing();
        parser = IOUtils.readObjectFromURLOrClasspathOrFileSystem(path);
        timing.done(logger, "Loading parser from serialized file " + path);
    } catch (IOException | ClassNotFoundException e) {
        throw new RuntimeIOException(e);
    }
    if (extraFlags.length > 0) {
        parser.setOptionFlags(extraFlags);
    }
    return parser;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) Timing(edu.stanford.nlp.util.Timing) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)114 IOException (java.io.IOException)61 BufferedReader (java.io.BufferedReader)22 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 File (java.io.File)9 ArrayList (java.util.ArrayList)7 Tree (edu.stanford.nlp.trees.Tree)6 CoreMap (edu.stanford.nlp.util.CoreMap)5 BufferedWriter (java.io.BufferedWriter)5 Properties (java.util.Properties)5 Timing (edu.stanford.nlp.util.Timing)4 FileNotFoundException (java.io.FileNotFoundException)4 FileOutputStream (java.io.FileOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 PrintWriter (java.io.PrintWriter)4 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)3