Search in sources :

Example 61 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class LexicalizedParser method loadModelFromZip.

public static LexicalizedParser loadModelFromZip(String zipFilename, String modelName) {
    LexicalizedParser parser = null;
    try {
        File file = new File(zipFilename);
        if (file.exists()) {
            ZipFile zin = new ZipFile(file);
            ZipEntry zentry = zin.getEntry(modelName);
            if (zentry != null) {
                InputStream in = zin.getInputStream(zentry);
                // gunzip it if necessary
                if (modelName.endsWith(".gz")) {
                    in = new GZIPInputStream(in);
                }
                ObjectInputStream ois = new ObjectInputStream(in);
                parser = loadModel(ois);
                ois.close();
                in.close();
            }
            zin.close();
        } else {
            throw new FileNotFoundException("Could not find " + modelName + " inside " + zipFilename);
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    return parser;
}
Also used : GZIPInputStream(java.util.zip.GZIPInputStream) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) ZipFile(java.util.zip.ZipFile) GZIPInputStream(java.util.zip.GZIPInputStream) ZipEntry(java.util.zip.ZipEntry) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) ZipFile(java.util.zip.ZipFile)

Example 62 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class StanfordCoreNLP method processFiles.

/**
   * A common method for processing a set of files, used in both {@link StanfordCoreNLP} as well as
   * {@link StanfordCoreNLPClient}.
   *
   * @param base The base input directory to process from.
   * @param files The files to process.
   * @param numThreads The number of threads to annotate on.
   * @param properties The properties file to use during annotation.
   *                   This should match the properties file used in the implementation of the annotate function.
   * @param annotate The function used to annotate a document.
   * @param print The function used to print a document.
   * @throws IOException
   */
protected static void processFiles(String base, final Collection<File> files, int numThreads, Properties properties, BiConsumer<Annotation, Consumer<Annotation>> annotate, BiConsumer<Annotation, OutputStream> print, OutputFormat outputFormat) throws IOException {
    // List<Runnable> toRun = new LinkedList<>();
    // Process properties here
    final String baseOutputDir = properties.getProperty("outputDirectory", ".");
    final String baseInputDir = properties.getProperty("inputDirectory", base);
    // Set of files to exclude
    final String excludeFilesParam = properties.getProperty("excludeFiles");
    final Set<String> excludeFiles = new HashSet<>();
    if (excludeFilesParam != null) {
        Iterable<String> lines = IOUtils.readLines(excludeFilesParam);
        for (String line : lines) {
            String name = line.trim();
            if (!name.isEmpty())
                excludeFiles.add(name);
        }
    }
    //(file info)
    final String serializerClass = properties.getProperty("serializer", GenericAnnotationSerializer.class.getName());
    final String inputSerializerClass = properties.getProperty("inputSerializer", serializerClass);
    final String inputSerializerName = (serializerClass.equals(inputSerializerClass)) ? "serializer" : "inputSerializer";
    String defaultExtension;
    switch(outputFormat) {
        case XML:
            defaultExtension = ".xml";
            break;
        case JSON:
            defaultExtension = ".json";
            break;
        case CONLL:
            defaultExtension = ".conll";
            break;
        case CONLLU:
            defaultExtension = ".conllu";
            break;
        case TEXT:
            defaultExtension = ".out";
            break;
        case SERIALIZED:
            defaultExtension = ".ser.gz";
            break;
        default:
            throw new IllegalArgumentException("Unknown output format " + outputFormat);
    }
    final String extension = properties.getProperty("outputExtension", defaultExtension);
    final boolean replaceExtension = Boolean.parseBoolean(properties.getProperty("replaceExtension", "false"));
    final boolean continueOnAnnotateError = Boolean.parseBoolean(properties.getProperty("continueOnAnnotateError", "false"));
    final boolean noClobber = Boolean.parseBoolean(properties.getProperty("noClobber", "false"));
    // final boolean randomize = Boolean.parseBoolean(properties.getProperty("randomize", "false"));
    final MutableInteger totalProcessed = new MutableInteger(0);
    final MutableInteger totalSkipped = new MutableInteger(0);
    final MutableInteger totalErrorAnnotating = new MutableInteger(0);
    //for each file...
    for (final File file : files) {
        // Determine if there is anything to be done....
        if (excludeFiles.contains(file.getName())) {
            logger.err("Skipping excluded file " + file.getName());
            totalSkipped.incValue(1);
            continue;
        }
        //--Get Output File Info
        //(filename)
        String outputDir = baseOutputDir;
        if (baseInputDir != null) {
            // Get input file name relative to base
            String relDir = file.getParent().replaceFirst(Pattern.quote(baseInputDir), "");
            outputDir = outputDir + File.separator + relDir;
        }
        // Make sure output directory exists
        new File(outputDir).mkdirs();
        String outputFilename = new File(outputDir, file.getName()).getPath();
        if (replaceExtension) {
            int lastDot = outputFilename.lastIndexOf('.');
            // for paths like "./zzz", lastDot will be 0
            if (lastDot > 0) {
                outputFilename = outputFilename.substring(0, lastDot);
            }
        }
        // ensure we don't make filenames with doubled extensions like .xml.xml
        if (!outputFilename.endsWith(extension)) {
            outputFilename += extension;
        }
        // normalize filename for the upcoming comparison
        outputFilename = new File(outputFilename).getCanonicalPath();
        //      Java 7 will have a Files.isSymbolicLink(file) method
        if (outputFilename.equals(file.getCanonicalPath())) {
            logger.err("Skipping " + file.getName() + ": output file " + outputFilename + " has the same filename as the input file -- assuming you don't actually want to do this.");
            totalSkipped.incValue(1);
            continue;
        }
        if (noClobber && new File(outputFilename).exists()) {
            logger.err("Skipping " + file.getName() + ": output file " + outputFilename + " as it already exists.  Don't use the noClobber option to override this.");
            totalSkipped.incValue(1);
            continue;
        }
        final String finalOutputFilename = outputFilename;
        //catching exceptions...
        try {
            // Check whether this file should be skipped again
            if (noClobber && new File(finalOutputFilename).exists()) {
                logger.err("Skipping " + file.getName() + ": output file " + finalOutputFilename + " as it already exists.  Don't use the noClobber option to override this.");
                synchronized (totalSkipped) {
                    totalSkipped.incValue(1);
                }
                return;
            }
            logger.info("Processing file " + file.getAbsolutePath() + " ... writing to " + finalOutputFilename);
            //--Process File
            Annotation annotation = null;
            if (file.getAbsolutePath().endsWith(".ser.gz")) {
                // maybe they want to continue processing a partially processed annotation
                try {
                    // Create serializers
                    if (inputSerializerClass != null) {
                        AnnotationSerializer inputSerializer = loadSerializer(inputSerializerClass, inputSerializerName, properties);
                        InputStream is = new BufferedInputStream(new FileInputStream(file));
                        Pair<Annotation, InputStream> pair = inputSerializer.read(is);
                        pair.second.close();
                        annotation = pair.first;
                        IOUtils.closeIgnoringExceptions(is);
                    } else {
                        annotation = IOUtils.readObjectFromFile(file);
                    }
                } catch (IOException e) {
                // guess that's not what they wanted
                // We hide IOExceptions because ones such as file not
                // found will be thrown again in a moment.  Note that
                // we are intentionally letting class cast exceptions
                // and class not found exceptions go through.
                } catch (ClassNotFoundException e) {
                    throw new RuntimeException(e);
                }
            }
            //(read file)
            if (annotation == null) {
                String encoding = properties.getProperty("encoding", "UTF-8");
                String text = IOUtils.slurpFile(file.getAbsoluteFile(), encoding);
                annotation = new Annotation(text);
            }
            Timing timing = new Timing();
            annotate.accept(annotation, finishedAnnotation -> {
                timing.done(logger, "Annotating file " + file.getAbsoluteFile());
                Throwable ex = finishedAnnotation.get(CoreAnnotations.ExceptionAnnotation.class);
                if (ex == null) {
                    try {
                        OutputStream fos = new BufferedOutputStream(new FileOutputStream(finalOutputFilename));
                        print.accept(finishedAnnotation, fos);
                        fos.close();
                    } catch (IOException e) {
                        throw new RuntimeIOException(e);
                    }
                    synchronized (totalProcessed) {
                        totalProcessed.incValue(1);
                        if (totalProcessed.intValue() % 1000 == 0) {
                            logger.info("Processed " + totalProcessed + " documents");
                        }
                    }
                } else if (continueOnAnnotateError) {
                    logger.err("Error annotating " + file.getAbsoluteFile() + ": " + ex);
                    synchronized (totalErrorAnnotating) {
                        totalErrorAnnotating.incValue(1);
                    }
                } else {
                    throw new RuntimeException("Error annotating " + file.getAbsoluteFile(), ex);
                }
            });
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }
/*
    if (randomize) {
      log("Randomly shuffling input");
      Collections.shuffle(toRun);
    }
    log("Ready to process: " + toRun.size() + " files, skipped " + totalSkipped + ", total " + nFiles);
    //--Run Jobs
    if(numThreads == 1){
      for(Runnable r : toRun){ r.run(); }
    } else {
      Redwood.Util.threadAndRun("StanfordCoreNLP <" + numThreads + " threads>", toRun, numThreads);
    }
    log("Processed " + totalProcessed + " documents");
    log("Skipped " + totalSkipped + " documents, error annotating " + totalErrorAnnotating + " documents");
    */
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) TreePrint(edu.stanford.nlp.trees.TreePrint) CoreAnnotation(edu.stanford.nlp.ling.CoreAnnotation) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Example 63 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class StanfordCoreNLPServer method main.

/**
   * The main method.
   * Read the command line arguments and run the server.
   *
   * @param args The command line arguments
   *
   * @throws IOException Thrown if we could not start / run the server.
   */
public static void main(String[] args) throws IOException {
    // Add a bit of logging
    log("--- " + StanfordCoreNLPServer.class.getSimpleName() + "#main() called ---");
    String build = System.getenv("BUILD");
    if (build != null) {
        log("    Build: " + build);
    }
    Runtime.getRuntime().addShutdownHook(new Thread(() -> log("CoreNLP Server is shutting down.")));
    // Fill arguments
    ArgumentParser.fillOptions(StanfordCoreNLPServer.class, args);
    // get server properties from command line, right now only property used is server_id
    Properties serverProperties = StringUtils.argsToProperties(args);
    // must come after filling global options
    StanfordCoreNLPServer server = new StanfordCoreNLPServer(serverProperties);
    ArgumentParser.fillOptions(server, args);
    log("    Threads: " + ArgumentParser.threads);
    // Start the liveness server
    AtomicBoolean live = new AtomicBoolean(false);
    server.livenessServer(live);
    // Create the homepage
    FileHandler homepage;
    try {
        homepage = new FileHandler("edu/stanford/nlp/pipeline/demo/corenlp-brat.html");
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    // Pre-load the models
    if (StanfordCoreNLPServer.preloadedAnnotators != null && !"".equals(StanfordCoreNLPServer.preloadedAnnotators.trim())) {
        Properties props = new Properties();
        server.defaultProps.entrySet().forEach(entry -> props.setProperty(entry.getKey().toString(), entry.getValue().toString()));
        props.setProperty("annotators", StanfordCoreNLPServer.preloadedAnnotators);
        try {
            new StanfordCoreNLP(props);
        } catch (Throwable ignored) {
            err("Could not pre-load annotators in server; encountered exception:");
            ignored.printStackTrace();
        }
    }
    // Credentials
    Optional<Pair<String, String>> credentials = Optional.empty();
    if (server.username != null && server.password != null) {
        credentials = Optional.of(Pair.makePair(server.username, server.password));
    }
    // Run the server
    log("Starting server...");
    if (server.ssl) {
        server.run(credentials, req -> true, res -> {
        }, homepage, true, live);
    } else {
        server.run(credentials, req -> true, res -> {
        }, homepage, false, live);
    }
}
Also used : AtomicBoolean(java.util.concurrent.atomic.AtomicBoolean) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 64 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class Document method xmlMinified.

/**
   * Like the {@link Document@xml(Function...)} function, but with minified XML more suitable
   * for sending over the wire.
   *
   * @param functions The (possibly empty) list of annotations to populate on the document before dumping it
   *                  to XML.
   * @return The XML String for this document, without unecessary whitespace.
   *
   */
@SafeVarargs
public final String xmlMinified(Function<Sentence, Object>... functions) {
    for (Function<Sentence, Object> f : functions) {
        f.apply(this.sentence(0));
    }
    try {
        AnnotationOutputter.Options options = new AnnotationOutputter.Options();
        options.pretty = false;
        return new XMLOutputter().print(this.asAnnotation(false), options);
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) IOException(java.io.IOException)

Example 65 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class Dictionaries method loadCountriesLists.

private void loadCountriesLists(String file) {
    try {
        BufferedReader reader = IOUtils.readerFromString(file);
        for (String line; (line = reader.readLine()) != null; ) {
            countries.add(line.split("\t")[1].toLowerCase());
        }
        reader.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)114 IOException (java.io.IOException)61 BufferedReader (java.io.BufferedReader)22 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 File (java.io.File)9 ArrayList (java.util.ArrayList)7 Tree (edu.stanford.nlp.trees.Tree)6 CoreMap (edu.stanford.nlp.util.CoreMap)5 BufferedWriter (java.io.BufferedWriter)5 Properties (java.util.Properties)5 Timing (edu.stanford.nlp.util.Timing)4 FileNotFoundException (java.io.FileNotFoundException)4 FileOutputStream (java.io.FileOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 PrintWriter (java.io.PrintWriter)4 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)3