use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CRFClassifier method readEntityMatrices.
static Pair<double[][], double[][]> readEntityMatrices(String fileName, Index<String> tagIndex) {
int numTags = tagIndex.size();
int matrixSize = numTags - 1;
String[] matrixLines = new String[matrixSize];
String[] subMatrixLines = new String[matrixSize];
try (BufferedReader br = IOUtils.readerFromString(fileName)) {
int lineCount = 0;
for (String line; (line = br.readLine()) != null; ) {
line = line.trim();
if (lineCount < matrixSize)
matrixLines[lineCount] = line;
else
subMatrixLines[lineCount - matrixSize] = line;
lineCount++;
}
} catch (Exception ex) {
throw new RuntimeIOException(ex);
}
double[][] matrix = parseMatrix(matrixLines, tagIndex, matrixSize, true);
double[][] subMatrix = parseMatrix(subMatrixLines, tagIndex, matrixSize, true);
// In Jenny's paper, use the square root of non-log prob for matrix, but not for subMatrix
for (int i = 0; i < matrix.length; i++) {
for (int j = 0; j < matrix[i].length; j++) matrix[i][j] = matrix[i][j] / 2;
}
log.info("Matrix: ");
log.info(ArrayUtils.toString(matrix));
log.info("SubMatrix: ");
log.info(ArrayUtils.toString(subMatrix));
return new Pair<>(matrix, subMatrix);
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class SplittingGrammarExtractor method useNewBetas.
public boolean useNewBetas(boolean testConverged, TwoDimensionalMap<String, String, double[][]> tempUnaryBetas, ThreeDimensionalMap<String, String, String, double[][][]> tempBinaryBetas) {
rescaleTemporaryBetas(tempUnaryBetas, tempBinaryBetas);
// if we just split states, we have obviously not converged
boolean converged = testConverged && testConvergence(tempUnaryBetas, tempBinaryBetas);
unaryBetas = tempUnaryBetas;
binaryBetas = tempBinaryBetas;
wordIndex = tempWordIndex;
tagIndex = tempTagIndex;
lex = tempLex;
if (DEBUG()) {
System.out.println("LEXICON");
try {
OutputStreamWriter osw = new OutputStreamWriter(System.out, "utf-8");
lex.writeData(osw);
osw.flush();
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
tempWordIndex = null;
tempTagIndex = null;
tempLex = null;
return converged;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CoNLLDocumentReader method getNextDocument.
public CoNLLDocument getNextDocument() {
try {
// DONE!
if (curFileIndex >= fileList.size())
return null;
File curFile = fileList.get(curFileIndex);
if (docIterator == null) {
docIterator = new DocumentIterator(curFile.getAbsolutePath(), options);
}
while (!docIterator.hasNext()) {
Redwood.log("debug-docreader", "Processed " + docIterator.docCnt + " documents in " + curFile.getAbsolutePath());
docIterator.close();
curFileIndex++;
if (curFileIndex >= fileList.size()) {
// DONE!
return null;
}
curFile = fileList.get(curFileIndex);
docIterator = new DocumentIterator(curFile.getAbsolutePath(), options);
}
CoNLLDocument next = docIterator.next();
Redwood.log("debug-docreader", "Reading document: " + next.getDocumentID() + " part: " + next.getPartNo());
return next;
} catch (IOException ex) {
throw new RuntimeIOException(ex);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Dictionaries method loadDemonymLists.
/**
* The format of the demonyms file is
* countryCityOrState ( TAB demonym )*
* Lines starting with # are ignored
* The file is cased but stored in in-memory data structures uncased.
* The results are:
* demonyms is a has from each country (etc.) to a set of demonymic Strings;
* adjectiveNation is a set of demonymic Strings;
* demonymSet has all country (etc.) names and all demonymic Strings.
*/
private void loadDemonymLists(String demonymFile) {
try (BufferedReader reader = IOUtils.readerFromString(demonymFile)) {
for (String line; (line = reader.readLine()) != null; ) {
line = line.toLowerCase(Locale.ENGLISH);
String[] tokens = line.split("\t");
if (tokens[0].startsWith("#"))
continue;
Set<String> set = Generics.newHashSet();
for (String s : tokens) {
set.add(s);
demonymSet.add(s);
}
demonyms.put(tokens[0], set);
}
adjectiveNation.addAll(demonymSet);
adjectiveNation.removeAll(demonyms.keySet());
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Dictionaries method loadAnimacyLists.
private void loadAnimacyLists(String animateWordsFile, String inanimateWordsFile) {
try {
getWordsFromFile(animateWordsFile, animateWords, false);
getWordsFromFile(inanimateWordsFile, inanimateWords, false);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
Aggregations