use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class LexicalizedParser method saveParserToTextFile.
/**
* Saves the parser defined by pd to the given filename.
* If there is an error, a RuntimeIOException is thrown.
*/
// todo: [cdm 2015] This doesn't use character encoding and it should!
public void saveParserToTextFile(String filename) {
if (reranker != null) {
throw new UnsupportedOperationException("Sorry, but parsers with rerankers cannot be saved to text file");
}
try {
log.info("Writing parser in text grammar format to file " + filename);
OutputStream os;
if (filename.endsWith(".gz")) {
// it's faster to do the buffering _outside_ the gzipping as here
os = new BufferedOutputStream(new GZIPOutputStream(new FileOutputStream(filename)));
} else {
os = new BufferedOutputStream(new FileOutputStream(filename));
}
PrintWriter out = new PrintWriter(os);
String prefix = "BEGIN ";
out.println(prefix + "OPTIONS");
op.writeData(out);
out.println();
log.info(".");
out.println(prefix + "STATE_INDEX");
stateIndex.saveToWriter(out);
out.println();
log.info(".");
out.println(prefix + "WORD_INDEX");
wordIndex.saveToWriter(out);
out.println();
log.info(".");
out.println(prefix + "TAG_INDEX");
tagIndex.saveToWriter(out);
out.println();
log.info(".");
String uwmClazz = ((lex.getUnknownWordModel() == null) ? "null" : lex.getUnknownWordModel().getClass().getCanonicalName());
out.println(prefix + "LEXICON " + uwmClazz);
lex.writeData(out);
out.println();
log.info(".");
out.println(prefix + "UNARY_GRAMMAR");
ug.writeData(out);
out.println();
log.info(".");
out.println(prefix + "BINARY_GRAMMAR");
bg.writeData(out);
out.println();
log.info(".");
out.println(prefix + "DEPENDENCY_GRAMMAR");
if (dg != null) {
dg.writeData(out);
}
out.println();
log.info(".");
out.flush();
out.close();
log.info("done.");
} catch (IOException e) {
log.info("Trouble saving parser data to ASCII format.");
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class ParseFiles method parseFiles.
public void parseFiles(String[] args, int argIndex, boolean tokenized, TokenizerFactory<? extends HasWord> tokenizerFactory, String elementDelimiter, String sentenceDelimiter, Function<List<HasWord>, List<HasWord>> escaper, String tagDelimiter) {
final DocType docType = (elementDelimiter == null) ? DocType.Plain : DocType.XML;
if (op.testOptions.verbose) {
if (tokenizerFactory != null)
pwErr.println("parseFiles: Tokenizer factory is: " + tokenizerFactory);
}
final Timing timer = new Timing();
// Loop over the files
for (int i = argIndex; i < args.length; i++) {
final String filename = args[i];
final DocumentPreprocessor documentPreprocessor;
if (filename.equals("-")) {
try {
documentPreprocessor = new DocumentPreprocessor(IOUtils.readerFromStdin(op.tlpParams.getInputEncoding()), docType);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
} else {
documentPreprocessor = new DocumentPreprocessor(filename, docType, op.tlpParams.getInputEncoding());
}
// Unused values are null per the main() method invocation below
// null is the default for these properties
documentPreprocessor.setSentenceFinalPuncWords(tlp.sentenceFinalPunctuationWords());
documentPreprocessor.setEscaper(escaper);
documentPreprocessor.setSentenceDelimiter(sentenceDelimiter);
documentPreprocessor.setTagDelimiter(tagDelimiter);
documentPreprocessor.setElementDelimiter(elementDelimiter);
if (tokenizerFactory == null)
documentPreprocessor.setTokenizerFactory((tokenized) ? null : tlp.getTokenizerFactory());
else
documentPreprocessor.setTokenizerFactory(tokenizerFactory);
// Setup the output
PrintWriter pwo = pwOut;
if (op.testOptions.writeOutputFiles) {
String normalizedName = filename;
try {
// this will exception if not a URL
new URL(normalizedName);
normalizedName = normalizedName.replaceAll("/", "_");
} catch (MalformedURLException e) {
// It isn't a URL, so silently ignore
}
String ext = (op.testOptions.outputFilesExtension == null) ? "stp" : op.testOptions.outputFilesExtension;
String fname = normalizedName + '.' + ext;
if (op.testOptions.outputFilesDirectory != null && !op.testOptions.outputFilesDirectory.isEmpty()) {
String fseparator = System.getProperty("file.separator");
if (fseparator == null || fseparator.isEmpty()) {
fseparator = "/";
}
File fnameFile = new File(fname);
fname = op.testOptions.outputFilesDirectory + fseparator + fnameFile.getName();
}
try {
pwo = op.tlpParams.pw(new FileOutputStream(fname));
} catch (IOException ioe) {
throw new RuntimeIOException(ioe);
}
}
treePrint.printHeader(pwo, op.tlpParams.getOutputEncoding());
pwErr.println("Parsing file: " + filename);
int num = 0;
int numProcessed = 0;
if (op.testOptions.testingThreads != 1) {
MulticoreWrapper<List<? extends HasWord>, ParserQuery> wrapper = new MulticoreWrapper<>(op.testOptions.testingThreads, new ParsingThreadsafeProcessor(pqFactory, pwErr));
for (List<HasWord> sentence : documentPreprocessor) {
num++;
numSents++;
int len = sentence.size();
numWords += len;
pwErr.println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.listToString(sentence, true));
wrapper.put(sentence);
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
processResults(pq, numProcessed++, pwo);
}
}
wrapper.join();
while (wrapper.peek()) {
ParserQuery pq = wrapper.poll();
processResults(pq, numProcessed++, pwo);
}
} else {
ParserQuery pq = pqFactory.parserQuery();
for (List<HasWord> sentence : documentPreprocessor) {
num++;
numSents++;
int len = sentence.size();
numWords += len;
pwErr.println("Parsing [sent. " + num + " len. " + len + "]: " + SentenceUtils.listToString(sentence, true));
pq.parseAndReport(sentence, pwErr);
processResults(pq, numProcessed++, pwo);
}
}
treePrint.printFooter(pwo);
if (op.testOptions.writeOutputFiles)
pwo.close();
pwErr.println("Parsed file: " + filename + " [" + num + " sentences].");
}
long millis = timer.stop();
if (summary) {
if (pcfgLL != null)
pcfgLL.display(false, pwErr);
if (depLL != null)
depLL.display(false, pwErr);
if (factLL != null)
factLL.display(false, pwErr);
}
if (saidMemMessage) {
ParserUtils.printOutOfMemory(pwErr);
}
double wordspersec = numWords / (((double) millis) / 1000);
double sentspersec = numSents / (((double) millis) / 1000);
// easier way!
NumberFormat nf = new DecimalFormat("0.00");
pwErr.println("Parsed " + numWords + " words in " + numSents + " sentences (" + nf.format(wordspersec) + " wds/sec; " + nf.format(sentspersec) + " sents/sec).");
if (numFallback > 0) {
pwErr.println(" " + numFallback + " sentences were parsed by fallback to PCFG.");
}
if (numUnparsable > 0 || numNoMemory > 0 || numSkipped > 0) {
pwErr.println(" " + (numUnparsable + numNoMemory + numSkipped) + " sentences were not parsed:");
if (numUnparsable > 0) {
pwErr.println(" " + numUnparsable + " were not parsable with non-zero probability.");
}
if (numNoMemory > 0) {
pwErr.println(" " + numNoMemory + " were skipped because of insufficient memory.");
}
if (numSkipped > 0) {
pwErr.println(" " + numSkipped + " were skipped as length 0 or greater than " + op.testOptions.maxLength);
}
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class DVParser method loadModel.
public static DVParser loadModel(String filename, String[] args) {
log.info("Loading serialized model from " + filename);
DVParser dvparser;
try {
dvparser = IOUtils.readObjectFromURLOrClasspathOrFileSystem(filename);
dvparser.op.setOptions(args);
} catch (IOException e) {
throw new RuntimeIOException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeIOException(e);
}
log.info("... done");
return dvparser;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class QNMinimizer method minimize.
public double[] minimize(DiffFunction dFunction, double functionTolerance, double[] initial, int maxFunctionEvaluations, QNInfo qn) {
if (!quiet) {
log.info("QNMinimizer called on double function of " + dFunction.domainDimension() + " variables, using " + (mem > 0 ? "M = " + mem : "dynamic settings of M") + '.');
}
if (qn == null && presetInfo == null) {
qn = scaleOpt == eScaling.SCALAR ? new ScalarQNInfo(mem) : new DiagonalQNInfo(mem);
noHistory = true;
} else if (presetInfo != null) {
qn = presetInfo;
noHistory = false;
} else if (qn != null) {
noHistory = false;
}
its = 0;
fevals = 0;
success = false;
// initialize weights
double[] x = initial;
// initialize gradient
double[] rawGrad = new double[x.length];
double[] newGrad = new double[x.length];
double[] newX = new double[x.length];
double[] dir = new double[x.length];
// initialize function value and gradient (gradient is stored in grad inside
// evaluateFunction)
double value = evaluateFunction(dFunction, x, rawGrad);
double[] grad;
if (useOWLQN) {
double norm = l1NormOWL(x, dFunction);
value += norm * lambdaOWL;
// step (1) in Galen & Gao except we are not computing v yet
grad = pseudoGradientOWL(x, rawGrad, dFunction);
} else {
grad = rawGrad;
}
PrintWriter outFile = null;
PrintWriter infoFile = null;
if (outputToFile) {
try {
String baseName = "QN_m" + mem + '_' + lsOpt.toString() + '_' + scaleOpt.toString();
outFile = new PrintWriter(new FileOutputStream(baseName + ".output"), true);
infoFile = new PrintWriter(new FileOutputStream(baseName + ".info"), true);
infoFile.println(dFunction.domainDimension() + "; DomainDimension ");
infoFile.println(mem + "; memory");
} catch (IOException e) {
throw new RuntimeIOException("Caught IOException outputting QN data to file", e);
}
}
Record rec = new Record(monitor, functionTolerance, outFile);
// sets the original gradient and x. Also stores the monitor.
rec.start(value, rawGrad, x);
// Check if max Evaluations and Iterations have been provided.
maxFevals = (maxFunctionEvaluations > 0) ? maxFunctionEvaluations : Integer.MAX_VALUE;
if (!quiet) {
log.info(" An explanation of the output:");
log.info("Iter The number of iterations");
log.info("evals The number of function evaluations");
log.info("SCALING <D> Diagonal scaling was used; <I> Scaled Identity");
log.info("LINESEARCH [## M steplength] Minpack linesearch");
log.info(" 1-Function value was too high");
log.info(" 2-Value ok, gradient positive, positive curvature");
log.info(" 3-Value ok, gradient negative, positive curvature");
log.info(" 4-Value ok, gradient negative, negative curvature");
log.info(" [.. B] Backtracking");
log.info("VALUE The current function value");
log.info("TIME Total elapsed time");
log.info("|GNORM| The current norm of the gradient");
log.info("{RELNORM} The ratio of the current to initial gradient norms");
log.info("AVEIMPROVE The average improvement / current value");
log.info("EVALSCORE The last available eval score");
log.info(" ");
log.info("Iter ## evals ## <SCALING> [LINESEARCH] VALUE TIME |GNORM| {RELNORM} AVEIMPROVE EVALSCORE");
}
StringBuilder sb = new StringBuilder(100);
eState state = eState.CONTINUE;
// Beginning of the loop.
do {
try {
if (!quiet) {
log.info(sb.toString());
}
sb.setLength(0);
boolean doEval = (its >= 0 && its >= startEvaluateIters && evaluateIters > 0 && its % evaluateIters == 0);
its += 1;
double newValue;
sb.append("Iter ").append(its).append(" evals ").append(fevals).append(' ');
// Compute the search direction
sb.append('<');
computeDir(dir, grad, x, qn, dFunction, sb);
sb.append("> ");
// sanity check dir
boolean hasNaNDir = false;
boolean hasNaNGrad = false;
for (int i = 0; i < dir.length; i++) {
if (dir[i] != dir[i])
hasNaNDir = true;
if (grad[i] != grad[i])
hasNaNGrad = true;
}
if (hasNaNDir && !hasNaNGrad) {
if (!quiet)
log.info("(NaN dir likely due to Hessian approx - resetting) ");
qn.clear();
// re-compute the search direction
sb.append('<');
computeDir(dir, grad, x, qn, dFunction, sb);
sb.append("> ");
}
// perform line search
sb.append('[');
// initialized in if/else/switch below
double[] newPoint;
if (useOWLQN) {
// only linear search is allowed for OWL-QN
newPoint = lineSearchBacktrackOWL(dFunction, dir, x, newX, grad, value, sb);
sb.append('B');
} else {
// switch between line search options.
switch(lsOpt) {
case BACKTRACK:
newPoint = lineSearchBacktrack(dFunction, dir, x, newX, grad, value, sb);
sb.append('B');
break;
case MINPACK:
newPoint = lineSearchMinPack(dFunction, dir, x, newX, grad, value, functionTolerance, sb);
sb.append('M');
break;
default:
throw new IllegalArgumentException("Invalid line search option for QNMinimizer.");
}
}
newValue = newPoint[f];
sb.append(' ').append(nf.format(newPoint[a])).append("] ");
// This shouldn't actually evaluate anything since that should have been
// done in the lineSearch.
System.arraycopy(dFunction.derivativeAt(newX), 0, newGrad, 0, newGrad.length);
// This is where all the s, y updates are applied.
// step (4) in Galen & Gao 2007
qn.update(newX, x, newGrad, rawGrad, newPoint[a]);
if (useOWLQN) {
System.arraycopy(newGrad, 0, rawGrad, 0, newGrad.length);
// pseudo gradient
newGrad = pseudoGradientOWL(newX, newGrad, dFunction);
}
double evalScore = Double.NEGATIVE_INFINITY;
if (doEval) {
evalScore = doEvaluation(newX);
}
// Add the current value and gradient to the records, this also monitors
// X and writes to output
rec.add(newValue, newGrad, newX, fevals, evalScore, sb);
// If you want to call a function and do whatever with the information ...
if (iterCallbackFunction != null) {
iterCallbackFunction.callback(newX, its, newValue, newGrad);
}
// shift
value = newValue;
// double[] temp = x;
// x = newX;
// newX = temp;
System.arraycopy(newX, 0, x, 0, x.length);
System.arraycopy(newGrad, 0, grad, 0, newGrad.length);
if (fevals > maxFevals) {
throw new MaxEvaluationsExceeded("Exceeded in minimize() loop.");
}
} catch (SurpriseConvergence s) {
if (!quiet)
log.info("QNMinimizer aborted due to surprise convergence");
break;
} catch (MaxEvaluationsExceeded m) {
if (!quiet) {
log.info("QNMinimizer aborted due to maximum number of function evaluations");
log.info(m.toString());
log.info("** This is not an acceptable termination of QNMinimizer, consider");
log.info("** increasing the max number of evaluations, or safeguarding your");
log.info("** program by checking the QNMinimizer.wasSuccessful() method.");
}
break;
} catch (OutOfMemoryError oome) {
if (qn.used > 1) {
qn.removeFirst();
sb.append("{Caught OutOfMemory, changing m from ").append(qn.mem).append(" to ").append(qn.used).append("}]");
qn.mem = qn.used;
} else {
throw oome;
}
}
} while (// end do while
(state = rec.toContinue(sb)) == eState.CONTINUE);
if (evaluateIters > 0) {
// do final evaluation
double evalScore = (useEvalImprovement ? doEvaluation(rec.getBest()) : doEvaluation(x));
if (!quiet)
log.info("final evalScore is: " + evalScore);
}
//
switch(state) {
case TERMINATE_GRADNORM:
if (!quiet)
log.info("QNMinimizer terminated due to numerically zero gradient: |g| < EPS max(1,|x|) ");
success = true;
break;
case TERMINATE_RELATIVENORM:
if (!quiet)
log.info("QNMinimizer terminated due to sufficient decrease in gradient norms: |g|/|g0| < TOL ");
success = true;
break;
case TERMINATE_AVERAGEIMPROVE:
if (!quiet)
log.info("QNMinimizer terminated due to average improvement: | newest_val - previous_val | / |newestVal| < TOL ");
success = true;
break;
case TERMINATE_MAXITR:
if (!quiet)
log.info("QNMinimizer terminated due to reached max iteration " + maxItr);
success = true;
break;
case TERMINATE_EVALIMPROVE:
if (!quiet)
log.info("QNMinimizer terminated due to no improvement on eval ");
success = true;
x = rec.getBest();
break;
default:
log.warn("QNMinimizer terminated without converging");
success = false;
break;
}
double completionTime = rec.howLong();
if (!quiet)
log.info("Total time spent in optimization: " + nfsec.format(completionTime) + 's');
if (outputToFile) {
infoFile.println(completionTime + "; Total Time ");
infoFile.println(fevals + "; Total evaluations");
infoFile.close();
outFile.close();
}
qn.free();
return x;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class ParserGrammar method loadModel.
public static ParserGrammar loadModel(String path, String... extraFlags) {
ParserGrammar parser;
try {
Timing timing = new Timing();
parser = IOUtils.readObjectFromURLOrClasspathOrFileSystem(path);
timing.done(logger, "Loading parser from serialized file " + path);
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeIOException(e);
}
if (extraFlags.length > 0) {
parser.setOptionFlags(extraFlags);
}
return parser;
}
Aggregations