use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReaderAndWriter method makeCoreLabel.
/**
* This deals with the CoNLL files for different languages which have
* between 2 and 5 columns on non-blank lines.
*
* @param line A line of CoNLL input
* @return The constructed token
*/
private CoreLabel makeCoreLabel(String line) {
CoreLabel wi = new CoreLabel();
// wi.line = line;
String[] bits = line.split("\\s+");
switch(bits.length) {
case 0:
case 1:
wi.setWord(BOUNDARY);
wi.set(CoreAnnotations.AnswerAnnotation.class, flags.backgroundSymbol);
break;
case 2:
wi.setWord(bits[0]);
wi.set(CoreAnnotations.AnswerAnnotation.class, bits[1]);
break;
case 3:
wi.setWord(bits[0]);
wi.setTag(bits[1]);
wi.set(CoreAnnotations.AnswerAnnotation.class, bits[2]);
break;
case 4:
wi.setWord(bits[0]);
wi.setTag(bits[1]);
wi.set(CoreAnnotations.ChunkAnnotation.class, bits[2]);
wi.set(CoreAnnotations.AnswerAnnotation.class, bits[3]);
break;
case 5:
if (flags.useLemmaAsWord) {
wi.setWord(bits[1]);
} else {
wi.setWord(bits[0]);
}
wi.set(CoreAnnotations.LemmaAnnotation.class, bits[1]);
wi.setTag(bits[2]);
wi.set(CoreAnnotations.ChunkAnnotation.class, bits[3]);
wi.set(CoreAnnotations.AnswerAnnotation.class, bits[4]);
break;
default:
throw new RuntimeIOException("Unexpected input (many fields): " + line);
}
// Value annotation is used in a lot of place in corenlp so setting here as the word itself
wi.set(CoreAnnotations.ValueAnnotation.class, wi.word());
// The copy to GoldAnswerAnnotation is done before the recoding is done, and so it preserves the original coding.
// This is important if the original coding is true, but the recoding is defective (like IOB2 to IO), since
// it will allow correct evaluation later.
wi.set(CoreAnnotations.GoldAnswerAnnotation.class, wi.get(CoreAnnotations.AnswerAnnotation.class));
return wi;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class SentimentModel method loadSerialized.
public static SentimentModel loadSerialized(String path) {
try {
Timing timing = new Timing();
SentimentModel model = IOUtils.readObjectFromURLOrClasspathOrFileSystem(path);
timing.done(log, "Loading sentiment model " + path);
return model;
} catch (IOException | ClassNotFoundException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class AffixDictionary method readDict.
private static Set<String> readDict(String filename) {
Set<String> a = Generics.newHashSet();
try {
/*
if(filename.endsWith("in.as") ||filename.endsWith("in.city") ){
aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "Big5_HKSCS"));
}else{ aDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030"));
}
*/
BufferedReader aDetectorReader = IOUtils.readerFromString(filename, "UTF-8");
// logger.debug("DEBUG: in affDict readDict");
for (String aDetectorLine; (aDetectorLine = aDetectorReader.readLine()) != null; ) {
// logger.debug("DEBUG: affDict: "+filename+" "+aDetectorLine);
a.add(aDetectorLine);
}
aDetectorReader.close();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
// logger.info("XM:::readDict(filename: " + filename + ")");
logger.info("Loading affix dictionary from " + filename + " [done].");
return a;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class ChineseDictionary method serializeDictionary.
private void serializeDictionary(String serializePath) {
logger.info("Serializing dictionaries to " + serializePath + " ... ");
try {
ObjectOutputStream oos = IOUtils.writeStreamFromString(serializePath);
// oos.writeObject(MAX_LEXICON_LENGTH);
oos.writeObject(words_);
// oos.writeObject(cdtos_);
oos.close();
logger.info("done.");
} catch (Exception e) {
logger.error("Failed", e);
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class KBPStatisticalExtractor method main.
public static void main(String[] args) throws IOException, ClassNotFoundException {
// Disable SLF4J crap.
RedwoodConfiguration.standard().apply();
// Fill command-line options
ArgumentParser.fillOptions(KBPStatisticalExtractor.class, args);
// Load the test (or dev) data
forceTrack("Test data");
List<Pair<KBPInput, String>> testExamples = KBPRelationExtractor.readDataset(TEST_FILE);
log.info("Read " + testExamples.size() + " examples");
endTrack("Test data");
// If we can't find an existing model, train one
if (!IOUtils.existsInClasspathOrFileSystem(MODEL_FILE)) {
forceTrack("Training data");
List<Pair<KBPInput, String>> trainExamples = KBPRelationExtractor.readDataset(TRAIN_FILE);
log.info("Read " + trainExamples.size() + " examples");
log.info("" + trainExamples.stream().map(Pair::second).filter(NO_RELATION::equals).count() + " are " + NO_RELATION);
endTrack("Training data");
// Featurize + create the dataset
forceTrack("Creating dataset");
RVFDataset<String, String> dataset = new RVFDataset<>();
final AtomicInteger i = new AtomicInteger(0);
long beginTime = System.currentTimeMillis();
trainExamples.stream().parallel().forEach(example -> {
if (i.incrementAndGet() % 1000 == 0) {
log.info("[" + Redwood.formatTimeDifference(System.currentTimeMillis() - beginTime) + "] Featurized " + i.get() + " / " + trainExamples.size() + " examples");
}
// This takes a while per example
Counter<String> features = features(example.first);
synchronized (dataset) {
dataset.add(new RVFDatum<>(features, example.second));
}
});
// Free up some memory
trainExamples.clear();
endTrack("Creating dataset");
// Train the classifier
log.info("Training classifier:");
Classifier<String, String> classifier = trainMultinomialClassifier(dataset, FEATURE_THRESHOLD, SIGMA);
// Free up some memory
dataset.clear();
// Save the classifier
IOUtils.writeObjectToFile(new KBPStatisticalExtractor(classifier), MODEL_FILE);
}
// Read either a newly-trained or pre-trained model
Object model = IOUtils.readObjectFromURLOrClasspathOrFileSystem(MODEL_FILE);
KBPStatisticalExtractor classifier;
if (model instanceof Classifier) {
// noinspection unchecked
classifier = new KBPStatisticalExtractor((Classifier<String, String>) model);
} else {
classifier = ((KBPStatisticalExtractor) model);
}
// Evaluate the model
classifier.computeAccuracy(testExamples.stream(), PREDICTIONS.map(x -> {
try {
return "stdout".equalsIgnoreCase(x) ? System.out : new PrintStream(new FileOutputStream(x));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}));
}
Aggregations