use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CacheParseHypotheses method convertToTrees.
public static List<Tree> convertToTrees(byte[] input) {
try {
List<Tree> output = new ArrayList<>();
ByteArrayInputStream bis = new ByteArrayInputStream(input);
GZIPInputStream gis = new GZIPInputStream(bis);
ObjectInputStream ois = new ObjectInputStream(gis);
int size = ErasureUtils.<Integer>uncheckedCast(ois.readObject());
for (int i = 0; i < size; ++i) {
String rawTree = ErasureUtils.uncheckedCast(ois.readObject());
Tree tree = Tree.valueOf(rawTree, trf);
tree.setSpans();
output.add(tree);
}
ois.close();
gis.close();
bis.close();
return output;
} catch (IOException e) {
throw new RuntimeIOException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CustomAnnotationSerializer method loadToken.
private static CoreLabel loadToken(String line, boolean haveExplicitAntecedent) {
CoreLabel token = new CoreLabel();
String[] bits = line.split("\t", -1);
if (bits.length < 7)
throw new RuntimeIOException("ERROR: Invalid format token for serialized token (only " + bits.length + " tokens): " + line);
// word
String word = bits[0].replaceAll(SPACE_HOLDER, " ");
token.set(CoreAnnotations.TextAnnotation.class, word);
token.set(CoreAnnotations.ValueAnnotation.class, word);
// lemma
if (bits[1].length() > 0 || bits[0].length() == 0) {
String lemma = bits[1].replaceAll(SPACE_HOLDER, " ");
token.set(CoreAnnotations.LemmaAnnotation.class, lemma);
}
// POS tag
if (bits[2].length() > 0)
token.set(CoreAnnotations.PartOfSpeechAnnotation.class, bits[2]);
// NE tag
if (bits[3].length() > 0)
token.set(CoreAnnotations.NamedEntityTagAnnotation.class, bits[3]);
// Normalized NE tag
if (bits[4].length() > 0)
token.set(CoreAnnotations.NormalizedNamedEntityTagAnnotation.class, bits[4]);
// Character offsets
if (bits[5].length() > 0)
token.set(CoreAnnotations.CharacterOffsetBeginAnnotation.class, Integer.parseInt(bits[5]));
if (bits[6].length() > 0)
token.set(CoreAnnotations.CharacterOffsetEndAnnotation.class, Integer.parseInt(bits[6]));
if (haveExplicitAntecedent) {
// We may have AntecedentAnnotation
if (bits.length > 7) {
String aa = bits[7].replaceAll(SPACE_HOLDER, " ");
if (aa.length() > 0)
token.set(CoreAnnotations.AntecedentAnnotation.class, aa);
}
}
return token;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class ColumnDataClassifier method loadWordVectors.
private static Map<String, float[]> loadWordVectors(String filename) {
Timing timing = new Timing();
// presumably they'll load a fair-sized vocab!?
Map<String, float[]> map = new HashMap<>(10000);
try (BufferedReader br = IOUtils.readerFromString(filename)) {
int numDimensions = -1;
boolean warned = false;
for (String line; (line = br.readLine()) != null; ) {
String[] fields = line.split("\\s+");
if (numDimensions < 0) {
numDimensions = fields.length - 1;
} else {
if (numDimensions != fields.length - 1 && !warned) {
logger.info("loadWordVectors: Inconsistent vector size: " + numDimensions + " vs. " + (fields.length - 1));
warned = true;
}
}
float[] vector = new float[fields.length - 1];
for (int i = 1; i < fields.length; i++) {
vector[i - 1] = Float.parseFloat(fields[i]);
}
map.put(fields[0], vector);
}
} catch (IOException ioe) {
throw new RuntimeIOException("Couldn't load word vectors", ioe);
}
timing.done("Loading word vectors from " + filename + " ... ");
return map;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class LinearClassifierFactory method loadFromFilename.
/**
* Given the path to a file representing the text based serialization of a
* Linear Classifier, reconstitutes and returns that LinearClassifier.
*
* TODO: Leverage Index
*/
public static LinearClassifier<String, String> loadFromFilename(String file) {
try {
BufferedReader in = IOUtils.readerFromString(file);
// Format: read indices first, weights, then thresholds
Index<String> labelIndex = HashIndex.loadFromReader(in);
Index<String> featureIndex = HashIndex.loadFromReader(in);
double[][] weights = new double[featureIndex.size()][labelIndex.size()];
int currLine = 1;
String line = in.readLine();
while (line != null && line.length() > 0) {
String[] tuples = line.split(LinearClassifier.TEXT_SERIALIZATION_DELIMITER);
if (tuples.length != 3) {
throw new Exception("Error: incorrect number of tokens in weight specifier, line=" + currLine + " in file " + file);
}
currLine++;
int feature = Integer.parseInt(tuples[0]);
int label = Integer.parseInt(tuples[1]);
double value = Double.parseDouble(tuples[2]);
weights[feature][label] = value;
line = in.readLine();
}
// First line in thresholds is the number of thresholds
int numThresholds = Integer.parseInt(in.readLine());
double[] thresholds = new double[numThresholds];
int curr = 0;
while ((line = in.readLine()) != null) {
double tval = Double.parseDouble(line.trim());
thresholds[curr++] = tval;
}
in.close();
LinearClassifier<String, String> classifier = new LinearClassifier<>(weights, featureIndex, labelIndex);
return classifier;
} catch (Exception e) {
throw new RuntimeIOException("Error in LinearClassifierFactory, loading from file=" + file, e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CRFClassifier method serializeClassifier.
/**
* Serialize the classifier to the given ObjectOutputStream.
* <br>
* (Since the classifier is a processor, we don't want to serialize the
* whole classifier but just the data that represents a classifier model.)
*/
@Override
public void serializeClassifier(ObjectOutputStream oos) {
try {
oos.writeObject(labelIndices);
oos.writeObject(classIndex);
oos.writeObject(featureIndex);
oos.writeObject(flags);
if (flags.useEmbedding) {
oos.writeObject(embeddings);
}
// For some reason, writing out the array of FeatureFactory
// objects doesn't seem to work. The resulting classifier
// doesn't have the lexicon (distsim object) correctly saved. So now custom write the list
oos.writeObject(featureFactories.size());
for (FeatureFactory<IN> ff : featureFactories) {
oos.writeObject(ff);
}
oos.writeInt(windowSize);
oos.writeObject(weights);
// oos.writeObject(WordShapeClassifier.getKnownLowerCaseWords());
oos.writeObject(knownLCWords);
if (labelDictionary != null) {
oos.writeObject(labelDictionary);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
Aggregations