use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Dictionaries method loadGenderNumber.
/**
* Load Bergsma and Lin (2006) gender and number list.
* <br>
* The list is converted from raw text and numbers to a serialized
* map, which saves quite a bit of time loading.
* See edu.stanford.nlp.dcoref.util.ConvertGenderFile
*/
/*
private void loadGenderNumber(String file, String neutralWordsFile) {
try {
getWordsFromFile(neutralWordsFile, neutralWords, false);
Map<List<String>, Gender> temp = IOUtils.readObjectFromURLOrClasspathOrFileSystem(file);
genderNumber.putAll(temp);
} catch (IOException e) {
throw new RuntimeIOException(e);
} catch (ClassNotFoundException e) {
throw new RuntimeIOException(e);
}
}
*/
/**
* Load Bergsma and Lin (2006) gender and number list.
*/
private void loadGenderNumber(String file, String neutralWordsFile) {
try (BufferedReader reader = IOUtils.readerFromString(file)) {
getWordsFromFile(neutralWordsFile, neutralWords, false);
String[] split = new String[2];
String[] countStr = new String[3];
for (String line; (line = reader.readLine()) != null; ) {
StringUtils.splitOnChar(split, line, '\t');
StringUtils.splitOnChar(countStr, split[1], ' ');
int male = Integer.parseInt(countStr[0]);
int female = Integer.parseInt(countStr[1]);
int neutral = Integer.parseInt(countStr[2]);
Gender gender = Gender.UNKNOWN;
if (male * 0.5 > female + neutral && male > 2) {
gender = Gender.MALE;
} else if (female * 0.5 > male + neutral && female > 2) {
gender = Gender.FEMALE;
} else if (neutral * 0.5 > male + female && neutral > 2) {
gender = Gender.NEUTRAL;
}
if (gender == Gender.UNKNOWN) {
continue;
}
String[] words = split[0].split(" ");
List<String> tokens = Arrays.asList(words);
genderNumber.put(tokens, gender);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class Dictionaries method loadNumberLists.
private void loadNumberLists(String pluralWordsFile, String singularWordsFile) {
try {
getWordsFromFile(pluralWordsFile, pluralWords, false);
getWordsFromFile(singularWordsFile, singularWords, false);
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class RVFDataset method readSVMLightFormat.
private static RVFDataset<String, String> readSVMLightFormat(String filename, Index<String> featureIndex, Index<String> labelIndex, List<String> lines) {
BufferedReader in = null;
RVFDataset<String, String> dataset;
try {
dataset = new RVFDataset<>(10, featureIndex, labelIndex);
in = IOUtils.readerFromString(filename);
while (in.ready()) {
String line = in.readLine();
if (lines != null)
lines.add(line);
dataset.add(svmLightLineToRVFDatum(line));
}
} catch (IOException e) {
throw new RuntimeIOException(e);
} finally {
IOUtils.closeIgnoringExceptions(in);
}
return dataset;
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class CTBunkDict method readCTBunkDict.
private static void readCTBunkDict(String filename) {
CTBunk_dict = Generics.newHashMap();
try {
BufferedReader CTBunkDetectorReader = new BufferedReader(new InputStreamReader(new FileInputStream(filename), "GB18030"));
for (String CTBunkDetectorLine; (CTBunkDetectorLine = CTBunkDetectorReader.readLine()) != null; ) {
String[] fields = CTBunkDetectorLine.split(" ");
String tag = fields[1];
Set<String> words = CTBunk_dict.get(tag);
if (words == null) {
words = Generics.newHashSet();
CTBunk_dict.put(tag, words);
}
words.add(fields[0]);
}
} catch (FileNotFoundException e) {
throw new RuntimeIOException("CTBunk file not found: " + filename, e);
} catch (IOException e) {
throw new RuntimeIOException("CTBunk I/O error: " + filename, e);
}
}
use of edu.stanford.nlp.io.RuntimeIOException in project CoreNLP by stanfordnlp.
the class MaxentTagger method outputTaggedSentence.
public void outputTaggedSentence(List<? extends HasWord> sentence, boolean outputLemmas, OutputStyle outputStyle, boolean outputVerbosity, int numSentences, String separator, Writer writer) {
try {
switch(outputStyle) {
case TSV:
writer.write(getTsvWords(outputVerbosity, outputLemmas, sentence));
break;
case XML:
case INLINE_XML:
writeXMLSentence(writer, sentence, numSentences, outputLemmas);
break;
case SLASH_TAGS:
writer.write(SentenceUtils.listToString(sentence, false, config.getTagSeparator()));
writer.write(separator);
break;
default:
throw new IllegalArgumentException("Unsupported output style " + outputStyle);
}
} catch (IOException e) {
throw new RuntimeIOException(e);
}
}
Aggregations