use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class AceToken method loadDictionary.
/** Loads one dictionary from disk */
private static void loadDictionary(Map<String, String> dict, String file) throws java.io.FileNotFoundException, java.io.IOException {
BufferedReader in = new BufferedReader(new FileReader(file));
String line;
while ((line = in.readLine()) != null) {
ArrayList<String> tokens = SimpleTokenize.tokenize(line);
if (tokens.size() > 0) {
String lower = tokens.get(0).toLowerCase();
if (tokens.size() == 1)
dict.put(lower, "true");
else
dict.put(lower, tokens.get(1));
}
}
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class AceToken method loadProximityClasses.
/**
* Loads all proximity classes from the hard disk The WORDS map must be
* created before!
*/
public static void loadProximityClasses(String proxFileName) throws java.io.IOException {
log.info("Loading proximity classes...");
BufferedReader in = null;
try {
in = new BufferedReader(new FileReader(proxFileName));
} catch (java.io.IOException e) {
log.info("Warning: no proximity database found.");
return;
}
String line;
while ((line = in.readLine()) != null) {
ArrayList<String> tokens = SimpleTokenize.tokenize(line);
if (tokens.size() > 0) {
Integer key = WORDS.get(tokens.get(0));
ArrayList<Integer> value = new ArrayList<>();
for (int i = 0; i < tokens.size() && i < PROXIMITY_CLASS_SIZE; i++) {
Integer word = WORDS.get(tokens.get(i));
value.add(word);
}
PROX_CLASSES.put(key, value);
}
}
in.close();
log.info("Finished loading proximity classes.");
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class RobustTokenizer method main.
public static void main(String[] argv) throws Exception {
if (argv.length != 1) {
log.info("Usage: java edu.stanford.nlp.ie.machinereading.common.RobustTokenizer <file to tokenize>");
System.exit(1);
}
// tokenize this file
BufferedReader is = new BufferedReader(new FileReader(argv[0]));
// read the whole file in a buffer
// XXX: for sure there are more efficient ways of reading a file...
int ch;
StringBuffer buffer = new StringBuffer();
while ((ch = is.read()) != -1) buffer.append((char) ch);
// create the tokenizer object
RobustTokenizer<Word> t = new RobustTokenizer<>(buffer.toString());
List<Word> tokens = t.tokenize();
for (Word token : tokens) {
System.out.println(token);
}
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class AceDocument method readRawBytes.
private void readRawBytes(String fileName) throws IOException {
BufferedReader in = new BufferedReader(new FileReader(fileName));
StringBuffer buf = new StringBuffer();
int c;
while ((c = in.read()) >= 0) buf.append((char) c);
mRawBuffer = buf.toString();
// System.out.println(mRawBuffer);
in.close();
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class SplitCanditoTrees method readTrees.
static Map<String, Tree> readTrees(String[] filenames) throws IOException {
// TODO: perhaps we can just pass in CC_TAGSET and get rid of replacePOSTags
// need to test that
final TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false);
Map<String, Tree> treeMap = Generics.newHashMap();
for (String filename : filenames) {
File file = new File(filename);
String canonicalFilename = file.getName().substring(0, file.getName().lastIndexOf('.'));
FrenchXMLTreeReader tr = (FrenchXMLTreeReader) trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO8859_1")));
Tree t = null;
int numTrees;
for (numTrees = 0; (t = tr.readTree()) != null; numTrees++) {
String id = canonicalFilename + "-" + ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
treeMap.put(id, t);
}
tr.close();
System.err.printf("%s: %d trees%n", file.getName(), numTrees);
}
return treeMap;
}
Aggregations