use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class LexicalizedParser method loadModelFromZip.
public static LexicalizedParser loadModelFromZip(String zipFilename, String modelName) {
LexicalizedParser parser = null;
try {
File file = new File(zipFilename);
if (file.exists()) {
ZipFile zin = new ZipFile(file);
ZipEntry zentry = zin.getEntry(modelName);
if (zentry != null) {
InputStream in = zin.getInputStream(zentry);
// gunzip it if necessary
if (modelName.endsWith(".gz")) {
in = new GZIPInputStream(in);
}
ObjectInputStream ois = new ObjectInputStream(in);
parser = loadModel(ois);
ois.close();
in.close();
}
zin.close();
} else {
throw new FileNotFoundException("Could not find " + modelName + " inside " + zipFilename);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
return parser;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class StanfordCoreNLP method processFiles.
/**
* A common method for processing a set of files, used in both {@link StanfordCoreNLP} as well as
* {@link StanfordCoreNLPClient}.
*
* @param base The base input directory to process from.
* @param files The files to process.
* @param numThreads The number of threads to annotate on.
* @param properties The properties file to use during annotation.
* This should match the properties file used in the implementation of the annotate function.
* @param annotate The function used to annotate a document.
* @param print The function used to print a document.
* @throws IOException
*/
protected static void processFiles(String base, final Collection<File> files, int numThreads, Properties properties, BiConsumer<Annotation, Consumer<Annotation>> annotate, BiConsumer<Annotation, OutputStream> print, OutputFormat outputFormat) throws IOException {
// List<Runnable> toRun = new LinkedList<>();
// Process properties here
final String baseOutputDir = properties.getProperty("outputDirectory", ".");
final String baseInputDir = properties.getProperty("inputDirectory", base);
// Set of files to exclude
final String excludeFilesParam = properties.getProperty("excludeFiles");
final Set<String> excludeFiles = new HashSet<>();
if (excludeFilesParam != null) {
Iterable<String> lines = IOUtils.readLines(excludeFilesParam);
for (String line : lines) {
String name = line.trim();
if (!name.isEmpty())
excludeFiles.add(name);
}
}
//(file info)
final String serializerClass = properties.getProperty("serializer", GenericAnnotationSerializer.class.getName());
final String inputSerializerClass = properties.getProperty("inputSerializer", serializerClass);
final String inputSerializerName = (serializerClass.equals(inputSerializerClass)) ? "serializer" : "inputSerializer";
String defaultExtension;
switch(outputFormat) {
case XML:
defaultExtension = ".xml";
break;
case JSON:
defaultExtension = ".json";
break;
case CONLL:
defaultExtension = ".conll";
break;
case CONLLU:
defaultExtension = ".conllu";
break;
case TEXT:
defaultExtension = ".out";
break;
case SERIALIZED:
defaultExtension = ".ser.gz";
break;
default:
throw new IllegalArgumentException("Unknown output format " + outputFormat);
}
final String extension = properties.getProperty("outputExtension", defaultExtension);
final boolean replaceExtension = Boolean.parseBoolean(properties.getProperty("replaceExtension", "false"));
final boolean continueOnAnnotateError = Boolean.parseBoolean(properties.getProperty("continueOnAnnotateError", "false"));
final boolean noClobber = Boolean.parseBoolean(properties.getProperty("noClobber", "false"));
// final boolean randomize = Boolean.parseBoolean(properties.getProperty("randomize", "false"));
final MutableInteger totalProcessed = new MutableInteger(0);
final MutableInteger totalSkipped = new MutableInteger(0);
final MutableInteger totalErrorAnnotating = new MutableInteger(0);
//for each file...
for (final File file : files) {
// Determine if there is anything to be done....
if (excludeFiles.contains(file.getName())) {
logger.err("Skipping excluded file " + file.getName());
totalSkipped.incValue(1);
continue;
}
//--Get Output File Info
//(filename)
String outputDir = baseOutputDir;
if (baseInputDir != null) {
// Get input file name relative to base
String relDir = file.getParent().replaceFirst(Pattern.quote(baseInputDir), "");
outputDir = outputDir + File.separator + relDir;
}
// Make sure output directory exists
new File(outputDir).mkdirs();
String outputFilename = new File(outputDir, file.getName()).getPath();
if (replaceExtension) {
int lastDot = outputFilename.lastIndexOf('.');
// for paths like "./zzz", lastDot will be 0
if (lastDot > 0) {
outputFilename = outputFilename.substring(0, lastDot);
}
}
// ensure we don't make filenames with doubled extensions like .xml.xml
if (!outputFilename.endsWith(extension)) {
outputFilename += extension;
}
// normalize filename for the upcoming comparison
outputFilename = new File(outputFilename).getCanonicalPath();
// Java 7 will have a Files.isSymbolicLink(file) method
if (outputFilename.equals(file.getCanonicalPath())) {
logger.err("Skipping " + file.getName() + ": output file " + outputFilename + " has the same filename as the input file -- assuming you don't actually want to do this.");
totalSkipped.incValue(1);
continue;
}
if (noClobber && new File(outputFilename).exists()) {
logger.err("Skipping " + file.getName() + ": output file " + outputFilename + " as it already exists. Don't use the noClobber option to override this.");
totalSkipped.incValue(1);
continue;
}
final String finalOutputFilename = outputFilename;
//catching exceptions...
try {
// Check whether this file should be skipped again
if (noClobber && new File(finalOutputFilename).exists()) {
logger.err("Skipping " + file.getName() + ": output file " + finalOutputFilename + " as it already exists. Don't use the noClobber option to override this.");
synchronized (totalSkipped) {
totalSkipped.incValue(1);
}
return;
}
logger.info("Processing file " + file.getAbsolutePath() + " ... writing to " + finalOutputFilename);
//--Process File
Annotation annotation = null;
if (file.getAbsolutePath().endsWith(".ser.gz")) {
// maybe they want to continue processing a partially processed annotation
try {
// Create serializers
if (inputSerializerClass != null) {
AnnotationSerializer inputSerializer = loadSerializer(inputSerializerClass, inputSerializerName, properties);
InputStream is = new BufferedInputStream(new FileInputStream(file));
Pair<Annotation, InputStream> pair = inputSerializer.read(is);
pair.second.close();
annotation = pair.first;
IOUtils.closeIgnoringExceptions(is);
} else {
annotation = IOUtils.readObjectFromFile(file);
}
} catch (IOException e) {
// guess that's not what they wanted
// We hide IOExceptions because ones such as file not
// found will be thrown again in a moment. Note that
// we are intentionally letting class cast exceptions
// and class not found exceptions go through.
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
//(read file)
if (annotation == null) {
String encoding = properties.getProperty("encoding", "UTF-8");
String text = IOUtils.slurpFile(file.getAbsoluteFile(), encoding);
annotation = new Annotation(text);
}
Timing timing = new Timing();
annotate.accept(annotation, finishedAnnotation -> {
timing.done(logger, "Annotating file " + file.getAbsoluteFile());
Throwable ex = finishedAnnotation.get(CoreAnnotations.ExceptionAnnotation.class);
if (ex == null) {
try {
OutputStream fos = new BufferedOutputStream(new FileOutputStream(finalOutputFilename));
print.accept(finishedAnnotation, fos);
fos.close();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
synchronized (totalProcessed) {
totalProcessed.incValue(1);
if (totalProcessed.intValue() % 1000 == 0) {
logger.info("Processed " + totalProcessed + " documents");
}
}
} else if (continueOnAnnotateError) {
logger.err("Error annotating " + file.getAbsoluteFile() + ": " + ex);
synchronized (totalErrorAnnotating) {
totalErrorAnnotating.incValue(1);
}
} else {
throw new RuntimeException("Error annotating " + file.getAbsoluteFile(), ex);
}
});
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
/*
if (randomize) {
log("Randomly shuffling input");
Collections.shuffle(toRun);
}
log("Ready to process: " + toRun.size() + " files, skipped " + totalSkipped + ", total " + nFiles);
//--Run Jobs
if(numThreads == 1){
for(Runnable r : toRun){ r.run(); }
} else {
Redwood.Util.threadAndRun("StanfordCoreNLP <" + numThreads + " threads>", toRun, numThreads);
}
log("Processed " + totalProcessed + " documents");
log("Skipped " + totalSkipped + " documents, error annotating " + totalErrorAnnotating + " documents");
*/
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class StanfordCoreNLPServer method main.
/**
* The main method.
* Read the command line arguments and run the server.
*
* @param args The command line arguments
*
* @throws IOException Thrown if we could not start / run the server.
*/
public static void main(String[] args) throws IOException {
// Add a bit of logging
log("--- " + StanfordCoreNLPServer.class.getSimpleName() + "#main() called ---");
String build = System.getenv("BUILD");
if (build != null) {
log(" Build: " + build);
}
Runtime.getRuntime().addShutdownHook(new Thread(() -> log("CoreNLP Server is shutting down.")));
// Fill arguments
ArgumentParser.fillOptions(StanfordCoreNLPServer.class, args);
// get server properties from command line, right now only property used is server_id
Properties serverProperties = StringUtils.argsToProperties(args);
// must come after filling global options
StanfordCoreNLPServer server = new StanfordCoreNLPServer(serverProperties);
ArgumentParser.fillOptions(server, args);
log(" Threads: " + ArgumentParser.threads);
// Start the liveness server
AtomicBoolean live = new AtomicBoolean(false);
server.livenessServer(live);
// Create the homepage
FileHandler homepage;
try {
homepage = new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.html");
} catch (IOException e) {
throw new RuntimeIOException(e);
}
// Pre-load the models
if (StanfordCoreNLPServer.preloadedAnnotators != null && !"".equals(StanfordCoreNLPServer.preloadedAnnotators.trim())) {
Properties props = new Properties();
server.defaultProps.entrySet().forEach(entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString()));
props.setProperty("annotators", StanfordCoreNLPServer.preloadedAnnotators);
try {
new StanfordCoreNLP(props);
} catch (Throwable ignored) {
err("Could not pre-load annotators in server; encountered exception:");
ignored.printStackTrace();
}
}
// Credentials
Optional<Pair<String, String>> credentials = Optional.empty();
if (server.username != null && server.password != null) {
credentials = Optional.of(Pair.makePair(server.username, server.password));
}
// Run the server
log("Starting server...");
if (server.ssl) {
server.run(credentials, req -> true, res -> {
}, homepage, true, live);
} else {
server.run(credentials, req -> true, res -> {
}, homepage, false, live);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Document method xmlMinified.
/**
* Like the {@link Document@xml(Function...)} function, but with minified XML more suitable
* for sending over the wire.
*
* @param functions The (possibly empty) list of annotations to populate on the document before dumping it
* to XML.
* @return The XML String for this document, without unecessary whitespace.
*
*/
@SafeVarargs
public final String xmlMinified(Function<Sentence, Object>... functions) {
for (Function<Sentence, Object> f : functions) {
f.apply(this.sentence(0));
}
try {
AnnotationOutputter.Options options = new AnnotationOutputter.Options();
options.pretty = false;
return new XMLOutputter().print(this.asAnnotation(false), options);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Dictionaries method loadCountriesLists.
private void loadCountriesLists(String file) {
try {
BufferedReader reader = IOUtils.readerFromString(file);
for (String line; (line = reader.readLine()) != null; ) {
countries.add(line.split("\t")[1].toLowerCase());
}
reader.close();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
Aggregations