use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class PatternsSimpleThreadedITest method setUp.
@BeforeClass
public static void setUp() {
nlpPipeline = new AnnotationPipeline();
// We assume the input is already tokenized, so we use a cheap whitespace tokenizer.
// The original code uses this property for the tokenizer:
// props.setProperty("tokenize.options", "ptb3Escaping=false,normalizeParentheses=false,escapeForwardSlashAsterisk=false");
nlpPipeline.addAnnotator(new TokenizerAnnotator(false, TokenizerType.Whitespace));
nlpPipeline.addAnnotator(new WordsToSentencesAnnotator(false));
nlpPipeline.addAnnotator(new POSTaggerAnnotator());
nlpPipeline.addAnnotator(new MorphaAnnotator(false));
Properties nerAnnotatorProperties = new Properties();
nerAnnotatorProperties.setProperty("ner.useSUTime", Boolean.toString(false));
nerAnnotatorProperties.setProperty("ner.applyFineGrained", Boolean.toString(false));
// nerAnnotatorProperties.setProperty("ner.fine.regexner.mapping", spiedProperties.getProperty("fineGrainedRegexnerMapping"));
try {
nlpPipeline.addAnnotator(new NERCombinerAnnotator(nerAnnotatorProperties));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class PatternsSimpleThreadedITest method runTest.
void runTest(String numThreads) {
Properties spiedProperties = new Properties();
final Path tempPath;
try {
tempPath = Files.createTempDirectory(null);
spiedProperties.load(new InputStreamReader(new FileInputStream(new File("data/edu/stanford/nlp/patterns/patterns_itest.properties")), StandardCharsets.UTF_8));
} catch (IOException e) {
throw new RuntimeIOException(e);
}
Path outputPath = Paths.get(tempPath.toString(), "output");
Path modelPath = Paths.get(tempPath.toString(), "model");
Path docsentsPath = Paths.get(tempPath.toString(), "docsents.ser");
System.out.println("Test " + numThreads + " writing to " + tempPath);
spiedProperties.setProperty("seedWordsFiles", "VACCINE_PREVENTABLE_DISEASE,data/edu/stanford/nlp/patterns/VACCINE_PREVENTABLE_DISEASE.txt");
// We generate this file below
spiedProperties.setProperty("file", docsentsPath.toString());
spiedProperties.setProperty("fileFormat", "ser");
spiedProperties.setProperty("outDir", outputPath.toString());
spiedProperties.setProperty("patternsWordsDir", modelPath.toString());
spiedProperties.setProperty("loadSavedPatternsWordsDir", Boolean.toString(false));
spiedProperties.setProperty("numThreads", numThreads);
// Run the pipeline on an input document
// Algorithm based on
// https://github.com/stanfordnlp/CoreNLP/blob/a9a4c2d75b177790a24c0f46188810668d044cd8/src/edu/stanford/nlp/patterns/GetPatternsFromDataMultiClass.java#L702
// useTargetParserParentRestriction is false
final Annotation document = new Annotation("** If you survive measles without complications ** I love these . " + "Why would n't you survive without complications , Immunologist ?");
nlpPipeline.annotate(document);
// Convert annotation to map to serialize, similarly to the original code algorithm
int i = 0;
final Map<String, DataInstance> sentenceMap = new HashMap<>();
for (final CoreMap sentence : document.get(SentencesAnnotation.class)) {
sentenceMap.put(Integer.toString(i++), DataInstance.getNewInstance(PatternFactory.PatternType.SURFACE, sentence));
}
try (final ObjectOutputStream sentenceMapStream = new ObjectOutputStream(new FileOutputStream(docsentsPath.toString()))) {
sentenceMapStream.writeObject(sentenceMap);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
try {
GetPatternsFromDataMultiClass.<SurfacePattern>run(spiedProperties);
} catch (Exception e) {
System.out.println("Test " + numThreads + " FAILED");
System.out.println(" Intermediate files in " + tempPath);
throw new RuntimeException(e);
}
System.out.println("Cleaning up temp files from " + tempPath);
FileSystem.deleteDir(tempPath.toFile());
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CoNLL2011DocumentReader method getNextDocument.
public Document getNextDocument() {
try {
// DONE!
if (curFileIndex >= fileList.size())
return null;
File curFile = fileList.get(curFileIndex);
if (docIterator == null) {
docIterator = new DocumentIterator(curFile.getAbsolutePath(), options);
}
while (!docIterator.hasNext()) {
logger.info("Processed " + docIterator.docCnt + " documents in " + curFile.getAbsolutePath());
docIterator.close();
curFileIndex++;
if (curFileIndex >= fileList.size()) {
// DONE!
return null;
}
curFile = fileList.get(curFileIndex);
docIterator = new DocumentIterator(curFile.getAbsolutePath(), options);
}
Document next = docIterator.next();
SieveCoreferenceSystem.logger.fine("Reading document: " + next.getDocumentID());
return next;
} catch (IOException ex) {
throw new RuntimeIOException(ex);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class SieveCoreferenceSystem method getSingletonPredictorFromSerializedFile.
public static LogisticClassifier<String, String> getSingletonPredictorFromSerializedFile(String serializedFile) {
try {
ObjectInputStream ois = IOUtils.readStreamFromString(serializedFile);
Object o = ois.readObject();
if (o instanceof LogisticClassifier<?, ?>) {
return (LogisticClassifier<String, String>) o;
}
throw new ClassCastException("Wanted SingletonPredictor, got " + o.getClass());
} catch (IOException e) {
throw new RuntimeIOException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Dictionaries method loadDemonymLists.
/**
* The format of the demonyms file is
* countryCityOrState ( TAB demonym )*
* Lines starting with # are ignored
* The file is cased but stored in in-memory data structures uncased.
* The results are:
* demonyms is a hash from each country (etc.) to a set of demonymic Strings;
* adjectiveNation is a set of demonymic Strings;
* demonymSet has all country (etc.) names and all demonymic Strings.
*/
private void loadDemonymLists(String demonymFile) {
BufferedReader reader = null;
try {
reader = IOUtils.readerFromString(demonymFile);
for (String line; (line = reader.readLine()) != null; ) {
line = line.toLowerCase(Locale.ENGLISH);
String[] tokens = line.split("\t");
if (tokens[0].startsWith("#"))
continue;
Set<String> set = Generics.newHashSet();
for (String s : tokens) {
set.add(s);
demonymSet.add(s);
}
demonyms.put(tokens[0], set);
}
adjectiveNation.addAll(demonymSet);
adjectiveNation.removeAll(demonyms.keySet());
} catch (IOException e) {
throw new RuntimeIOException(e);
} finally {
IOUtils.closeIgnoringExceptions(reader);
}
}
Aggregations