Search in sources :

Example 31 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class CacheParseHypotheses method convertToTrees.

public static List<Tree> convertToTrees(byte[] input) {
    try {
        List<Tree> output = new ArrayList<>();
        ByteArrayInputStream bis = new ByteArrayInputStream(input);
        GZIPInputStream gis = new GZIPInputStream(bis);
        ObjectInputStream ois = new ObjectInputStream(gis);
        int size = ErasureUtils.<Integer>uncheckedCast(ois.readObject());
        for (int i = 0; i < size; ++i) {
            String rawTree = ErasureUtils.uncheckedCast(ois.readObject());
            Tree tree = Tree.valueOf(rawTree, trf);
            tree.setSpans();
            output.add(tree);
        }
        ois.close();
        gis.close();
        bis.close();
        return output;
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    } catch (ClassNotFoundException e) {
        throw new RuntimeException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) ArrayList(java.util.ArrayList) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) GZIPInputStream(java.util.zip.GZIPInputStream) Tree(edu.stanford.nlp.trees.Tree)

Example 32 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class CustomAnnotationSerializer method loadToken.

private static CoreLabel loadToken(String line, boolean haveExplicitAntecedent) {
    CoreLabel token = new CoreLabel();
    String[] bits = line.split("\t", -1);
    if (bits.length < 7)
        throw new RuntimeIOException("ERROR: Invalid format token for serialized token (only " + bits.length + " tokens): " + line);
    // word
    String word = bits[0].replaceAll(SPACE_HOLDER, " ");
    token.set(CoreAnnotations.TextAnnotation.class, word);
    token.set(CoreAnnotations.ValueAnnotation.class, word);
    // lemma
    if (bits[1].length() > 0 || bits[0].length() == 0) {
        String lemma = bits[1].replaceAll(SPACE_HOLDER, " ");
        token.set(CoreAnnotations.LemmaAnnotation.class, lemma);
    }
    // POS tag
    if (bits[2].length() > 0)
        token.set(CoreAnnotations.PartOfSpeechAnnotation.class, bits[2]);
    // NE tag
    if (bits[3].length() > 0)
        token.set(CoreAnnotations.NamedEntityTagAnnotation.class, bits[3]);
    // Normalized NE tag
    if (bits[4].length() > 0)
        token.set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, bits[4]);
    // Character offsets
    if (bits[5].length() > 0)
        token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.parseInt(bits[5]));
    if (bits[6].length() > 0)
        token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.parseInt(bits[6]));
    if (haveExplicitAntecedent) {
        // We may have AntecedentAnnotation
        if (bits.length > 7) {
            String aa = bits[7].replaceAll(SPACE_HOLDER, " ");
            if (aa.length() > 0)
                token.set(CoreAnnotations.AntecedentAnnotation.class, aa);
        }
    }
    return token;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) CoreLabel(edu.stanford.nlp.ling.CoreLabel) TreeCoreAnnotations(edu.stanford.nlp.trees.TreeCoreAnnotations) CoreAnnotations(edu.stanford.nlp.ling.CoreAnnotations) SemanticGraphCoreAnnotations(edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations) CorefCoreAnnotations(edu.stanford.nlp.coref.CorefCoreAnnotations)

Example 33 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class ColumnDataClassifier method loadWordVectors.

private static Map<String, float[]> loadWordVectors(String filename) {
    Timing timing = new Timing();
    // presumably they'll load a fair-sized vocab!?
    Map<String, float[]> map = new HashMap<>(10000);
    try (BufferedReader br = IOUtils.readerFromString(filename)) {
        int numDimensions = -1;
        boolean warned = false;
        for (String line; (line = br.readLine()) != null; ) {
            String[] fields = line.split("\\s+");
            if (numDimensions < 0) {
                numDimensions = fields.length - 1;
            } else {
                if (numDimensions != fields.length - 1 && !warned) {
                    logger.info("loadWordVectors: Inconsistent vector size: " + numDimensions + " vs. " + (fields.length - 1));
                    warned = true;
                }
            }
            float[] vector = new float[fields.length - 1];
            for (int i = 1; i < fields.length; i++) {
                vector[i - 1] = Float.parseFloat(fields[i]);
            }
            map.put(fields[0], vector);
        }
    } catch (IOException ioe) {
        throw new RuntimeIOException("Couldn't load word vectors", ioe);
    }
    timing.done("Loading word vectors from " + filename + " ... ");
    return map;
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) ConcurrentHashMap(java.util.concurrent.ConcurrentHashMap) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 34 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class LinearClassifierFactory method loadFromFilename.

/**
 * Given the path to a file representing the text based serialization of a
 * Linear Classifier, reconstitutes and returns that LinearClassifier.
 *
 * TODO: Leverage Index
 */
public static LinearClassifier<String, String> loadFromFilename(String file) {
    try {
        BufferedReader in = IOUtils.readerFromString(file);
        // Format: read indices first, weights, then thresholds
        Index<String> labelIndex = HashIndex.loadFromReader(in);
        Index<String> featureIndex = HashIndex.loadFromReader(in);
        double[][] weights = new double[featureIndex.size()][labelIndex.size()];
        int currLine = 1;
        String line = in.readLine();
        while (line != null && line.length() > 0) {
            String[] tuples = line.split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
            if (tuples.length != 3) {
                throw new Exception("Error: incorrect number of tokens in weight specifier, line=" + currLine + " in file " + file);
            }
            currLine++;
            int feature = Integer.parseInt(tuples[0]);
            int label = Integer.parseInt(tuples[1]);
            double value = Double.parseDouble(tuples[2]);
            weights[feature][label] = value;
            line = in.readLine();
        }
        // First line in thresholds is the number of thresholds
        int numThresholds = Integer.parseInt(in.readLine());
        double[] thresholds = new double[numThresholds];
        int curr = 0;
        while ((line = in.readLine()) != null) {
            double tval = Double.parseDouble(line.trim());
            thresholds[curr++] = tval;
        }
        in.close();
        LinearClassifier<String, String> classifier = new LinearClassifier<>(weights, featureIndex, labelIndex);
        return classifier;
    } catch (Exception e) {
        throw new RuntimeIOException("Error in LinearClassifierFactory, loading from file=" + file, e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) BufferedReader(java.io.BufferedReader) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Example 35 with RuntimeIOException

use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.

the class CRFClassifier method serializeClassifier.

/**
 * Serialize the classifier to the given ObjectOutputStream.
 * <br>
 * (Since the classifier is a processor, we don't want to serialize the
 * whole classifier but just the data that represents a classifier model.)
 */
@Override
public void serializeClassifier(ObjectOutputStream oos) {
    try {
        oos.writeObject(labelIndices);
        oos.writeObject(classIndex);
        oos.writeObject(featureIndex);
        oos.writeObject(flags);
        if (flags.useEmbedding) {
            oos.writeObject(embeddings);
        }
        // For some reason, writing out the array of FeatureFactory
        // objects doesn't seem to work.  The resulting classifier
        // doesn't have the lexicon (distsim object) correctly saved.  So now custom write the list
        oos.writeObject(featureFactories.size());
        for (FeatureFactory<IN> ff : featureFactories) {
            oos.writeObject(ff);
        }
        oos.writeInt(windowSize);
        oos.writeObject(weights);
        // oos.writeObject(WordShapeClassifier.getKnownLowerCaseWords());
        oos.writeObject(knownLCWords);
        if (labelDictionary != null) {
            oos.writeObject(labelDictionary);
        }
    } catch (IOException e) {
        throw new RuntimeIOException(e);
    }
}
Also used : RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException) RuntimeIOException(edu.stanford.nlp.io.RuntimeIOException)

Aggregations

RuntimeIOException (edu.stanford.nlp.io.RuntimeIOException)114 IOException (java.io.IOException)61 BufferedReader (java.io.BufferedReader)22 CoreAnnotations (edu.stanford.nlp.ling.CoreAnnotations)12 CoreLabel (edu.stanford.nlp.ling.CoreLabel)11 File (java.io.File)9 ArrayList (java.util.ArrayList)7 Tree (edu.stanford.nlp.trees.Tree)6 CoreMap (edu.stanford.nlp.util.CoreMap)5 BufferedWriter (java.io.BufferedWriter)5 Properties (java.util.Properties)5 Timing (edu.stanford.nlp.util.Timing)4 FileNotFoundException (java.io.FileNotFoundException)4 FileOutputStream (java.io.FileOutputStream)4 ObjectOutputStream (java.io.ObjectOutputStream)4 PrintWriter (java.io.PrintWriter)4 CorefCoreAnnotations (edu.stanford.nlp.coref.CorefCoreAnnotations)3 Annotation (edu.stanford.nlp.pipeline.Annotation)3 SemanticGraphCoreAnnotations (edu.stanford.nlp.semgraph.SemanticGraphCoreAnnotations)3 ClassicCounter (edu.stanford.nlp.stats.ClassicCounter)3