Search in sources :

Example 81 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class CoNLLDocumentReaderAndWriter method makeCoreLabel.

/**
 * This deals with the CoNLL files for different languages which have
 *  between 2 and 5 columns on non-blank lines.
 *
 *  @param line A line of CoNLL input
 *  @return The constructed token
 */
private CoreLabel makeCoreLabel(String line) {
    CoreLabel wi = new CoreLabel();
    // wi.line = line;
    String[] bits = line.split("\\s+");
    switch(bits.length) {
        case 0:
        case 1:
            wi.setWord(BOUNDARY);
            wi.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol);
            break;
        case 2:
            wi.setWord(bits[0]);
            wi.set(CoreAnnotations.AnswerAnnotation.class, bits[1]);
            break;
        case 3:
            wi.setWord(bits[0]);
            wi.setTag(bits[1]);
            wi.set(CoreAnnotations.AnswerAnnotation.class, bits[2]);
            break;
        case 4:
            wi.setWord(bits[0]);
            wi.setTag(bits[1]);
            wi.set(CoreAnnotations.ChunkAnnotation.class, bits[2]);
            wi.set(CoreAnnotations.AnswerAnnotation.class, bits[3]);
            break;
        case 5:
            if (flags.useLemmaAsWord) {
                wi.setWord(bits[1]);
            } else {
                wi.setWord(bits[0]);
            }
            wi.set(CoreAnnotations.LemmaAnnotation.class, bits[1]);
            wi.setTag(bits[2]);
            wi.set(CoreAnnotations.ChunkAnnotation.class, bits[3]);
            wi.set(CoreAnnotations.AnswerAnnotation.class, bits[4]);
            break;
        default:
            throw new RuntimeIOException("Unexpected input (many fields): " + line);
    }
    // Value annotation is used in a lot of place in corenlp so setting here as the word itself
    wi.set(CoreAnnotations.ValueAnnotation.class, wi.word());
    // The copy to GoldAnswerAnnotation is done before the recoding is done, and so it preserves the original coding.
    // This is important if the original coding is true, but the recoding is defective (like IOB2 to IO), since
    // it will allow correct evaluation later.
    wi.set(CoreAnnotations.GoldAnswerAnnotation.class, wi.get(CoreAnnotations.AnswerAnnotation.class));
    return wi;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations)

Example 82 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class SentimentModel method loadSerialized.

public static SentimentModel loadSerialized(String path) {
    try {
        Timing timing = new Timing();
        SentimentModel model = IOUtils.readObjectFromURLOrClasspathOrFileSystem(path);
        timing.done(log, "Loading sentiment model " + path);
        return model;
    } catch (IOException | ClassNotFoundException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) Timing(edu.stanford.nlp.util.Timing) IOException(java.io.IOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 83 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class AffixDictionary method readDict.

private static Set<String> readDict(String filename) {
    Set<String> a = Generics.newHashSet();
    try {
        /*
      if(filename.endsWith("in.as") ||filename.endsWith("in.city") ){
      	aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "Big5_HKSCS"));
      }else{ aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030"));
      }
      */
        BufferedReader aDetectorReader = IOUtils.readerFromString(filename, "UTF-8");
        // logger.debug("DEBUG: in affDict readDict");
        for (String aDetectorLine; (aDetectorLine = aDetectorReader.readLine()) != null; ) {
            // logger.debug("DEBUG: affDict: "+filename+" "+aDetectorLine);
            a.add(aDetectorLine);
        }
        aDetectorReader.close();
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
    // logger.info("XM:::readDict(filename: " + filename + ")");
    logger.info("Loading affix dictionary from " + filename + " [done].");
    return a;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 84 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class ChineseDictionary method serializeDictionary.

private void serializeDictionary(String serializePath) {
    logger.info("Serializing dictionaries to " + serializePath + " ... ");
    try {
        ObjectOutputStream oos = IOUtils.writeStreamFromString(serializePath);
        // oos.writeObject(MAX_LEXICON_LENGTH);
        oos.writeObject(words_);
        // oos.writeObject(cdtos_);
        oos.close();
        logger.info("done.");
    } catch (Exception e) {
        logger.error("Failed", e);
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 85 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class KBPStatisticalExtractor method main.

public static void main(String[] args) throws IOException, ClassNotFoundException {
    // Disable SLF4J crap.
    RedwoodConfiguration.standard().apply();
    // Fill command-line options
    ArgumentParser.fillOptions(KBPStatisticalExtractor.class, args);
    // Load the test (or dev) data
    forceTrack("Test data");
    List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);
    log.info("Read " + testExamples.size() + " examples");
    endTrack("Test data");
    // If we can't find an existing model, train one
    if (!IOUtils.existsInClasspathOrFileSystem(MODEL_FILE)) {
        forceTrack("Training data");
        List<Pair<KBPInput, String>> trainExamples = KBPRelationExtractor.readDataset(TRAIN_FILE);
        log.info("Read " + trainExamples.size() + " examples");
        log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION);
        endTrack("Training data");
        // Featurize + create the dataset
        forceTrack("Creating dataset");
        RVFDataset<String, String> dataset = new RVFDataset<>();
        final AtomicInteger i = new AtomicInteger(0);
        long beginTime = System.currentTimeMillis();
        trainExamples.stream().parallel().forEach(example -> {
            if (i.incrementAndGet() % 1000 == 0) {
                log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) + "] Featurized " + i.get() + " / " + trainExamples.size() + " examples");
            }
            // This takes a while per example
            Counter<String> features = features(example.first);
            synchronized (dataset) {
                dataset.add(new RVFDatum<>(features, example.second));
            }
        });
        // Free up some memory
        trainExamples.clear();
        endTrack("Creating dataset");
        // Train the classifier
        log.info("Training classifier:");
        Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA);
        // Free up some memory
        dataset.clear();
        // Save the classifier
        IOUtils.writeObjectToFile(new KBPStatisticalExtractor(classifier), MODEL_FILE);
    }
    // Read either a newly-trained or pre-trained model
    Object model = IOUtils.readObjectFromURLOrClasspathOrFileSystem(MODEL_FILE);
    KBPStatisticalExtractor classifier;
    if (model instanceof Classifier) {
        // noinspection unchecked
        classifier = new KBPStatisticalExtractor((Classifier<String, String>) model);
    } else {
        classifier = ((KBPStatisticalExtractor) model);
    }
    // Evaluate the model
    classifier.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
        try {
            return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
        } catch (IOException e) {
            throw new RuntimeIOException(e);
        }
    }));
}
Also used : edu.stanford.nlp.optimization(edu.stanford.nlp.optimization) CoreLabel(edu.stanford.nlp.ling.CoreLabel) java.util(java.util) Counters(edu.stanford.nlp.stats.Counters) IOUtils(edu.stanford.nlp.io.IOUtils) DefaultPaths(edu.stanford.nlp.pipeline.DefaultPaths) edu.stanford.nlp.util(edu.stanford.nlp.util) Redwood(edu.stanford.nlp.util.logging.Redwood) Util(edu.stanford.nlp.util.logging.Redwood.Util) Datum(edu.stanford.nlp.ling.Datum) Function(java.util.function.Function) Collectors(java.util.stream.Collectors) Span(edu.stanford.nlp.ie.machinereading.structure.Span) Counter(edu.stanford.nlp.stats.Counter) java.io(java.io) AtomicInteger(java.util.concurrent.atomic.AtomicInteger) edu.stanford.nlp.classify(edu.stanford.nlp.classify) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) Sentence(edu.stanford.nlp.simple.Sentence) RedwoodConfiguration(edu.stanford.nlp.util.logging.RedwoodConfiguration) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter) RVFDatum(edu.stanford.nlp.ling.RVFDatum) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) AtomicInteger(java.util.concurrent.atomic.AtomicInteger)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)114 IOException (java.io.IOException)61 BufferedReader (java.io.BufferedReader)22 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 File (java.io.File)9 ArrayList (java.util.ArrayList)7 Tree (edu.stanford.nlp.trees.Tree)6 CoreMap (edu.stanford.nlp.util.CoreMap)5 BufferedWriter (java.io.BufferedWriter)5 Properties (java.util.Properties)5 Timing (edu.stanford.nlp.util.Timing)4 FileNotFoundException (java.io.FileNotFoundException)4 FileOutputStream (java.io.FileOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 PrintWriter (java.io.PrintWriter)4 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)3