Search in sources :

Example 71 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class AceToken method loadDictionary.

/** Loads one dictionary from disk */
private static void loadDictionary(Map<String, String> dict, String file) throws java.io.FileNotFoundException, java.io.IOException {
    BufferedReader in = new BufferedReader(new FileReader(file));
    String line;
    while ((line = in.readLine()) != null) {
        ArrayList<String> tokens = SimpleTokenize.tokenize(line);
        if (tokens.size() > 0) {
            String lower = tokens.get(0).toLowerCase();
            if (tokens.size() == 1)
                dict.put(lower, "true");
            else
                dict.put(lower, tokens.get(1));
        }
    }
}
Also used : BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 72 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class AceToken method loadProximityClasses.

/**
   * Loads all proximity classes from the hard disk The WORDS map must be
   * created before!
   */
public static void loadProximityClasses(String proxFileName) throws java.io.IOException {
    log.info("Loading proximity classes...");
    BufferedReader in = null;
    try {
        in = new BufferedReader(new FileReader(proxFileName));
    } catch (java.io.IOException e) {
        log.info("Warning: no proximity database found.");
        return;
    }
    String line;
    while ((line = in.readLine()) != null) {
        ArrayList<String> tokens = SimpleTokenize.tokenize(line);
        if (tokens.size() > 0) {
            Integer key = WORDS.get(tokens.get(0));
            ArrayList<Integer> value = new ArrayList<>();
            for (int i = 0; i < tokens.size() && i < PROXIMITY_CLASS_SIZE; i++) {
                Integer word = WORDS.get(tokens.get(i));
                value.add(word);
            }
            PROX_CLASSES.put(key, value);
        }
    }
    in.close();
    log.info("Finished loading proximity classes.");
}
Also used : BufferedReader(java.io.BufferedReader) ArrayList(java.util.ArrayList) FileReader(java.io.FileReader)

Example 73 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class RobustTokenizer method main.

public static void main(String[] argv) throws Exception {
    if (argv.length != 1) {
        log.info("Usage: java edu.stanford.nlp.ie.machinereading.common.RobustTokenizer <file to tokenize>");
        System.exit(1);
    }
    // tokenize this file
    BufferedReader is = new BufferedReader(new FileReader(argv[0]));
    // read the whole file in a buffer
    // XXX: for sure there are more efficient ways of reading a file...
    int ch;
    StringBuffer buffer = new StringBuffer();
    while ((ch = is.read()) != -1) buffer.append((char) ch);
    // create the tokenizer object
    RobustTokenizer<Word> t = new RobustTokenizer<>(buffer.toString());
    List<Word> tokens = t.tokenize();
    for (Word token : tokens) {
        System.out.println(token);
    }
}
Also used : Word(edu.stanford.nlp.ling.Word) BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 74 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class AceDocument method readRawBytes.

private void readRawBytes(String fileName) throws IOException {
    BufferedReader in = new BufferedReader(new FileReader(fileName));
    StringBuffer buf = new StringBuffer();
    int c;
    while ((c = in.read()) >= 0) buf.append((char) c);
    mRawBuffer = buf.toString();
    // System.out.println(mRawBuffer);
    in.close();
}
Also used : BufferedReader(java.io.BufferedReader) FileReader(java.io.FileReader)

Example 75 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class SplitCanditoTrees method readTrees.

static Map<String, Tree> readTrees(String[] filenames) throws IOException {
    // TODO: perhaps we can just pass in CC_TAGSET and get rid of replacePOSTags
    // need to test that
    final TreeReaderFactory trf = new FrenchXMLTreeReaderFactory(false);
    Map<String, Tree> treeMap = Generics.newHashMap();
    for (String filename : filenames) {
        File file = new File(filename);
        String canonicalFilename = file.getName().substring(0, file.getName().lastIndexOf('.'));
        FrenchXMLTreeReader tr = (FrenchXMLTreeReader) trf.newTreeReader(new BufferedReader(new InputStreamReader(new FileInputStream(file), "ISO8859_1")));
        Tree t = null;
        int numTrees;
        for (numTrees = 0; (t = tr.readTree()) != null; numTrees++) {
            String id = canonicalFilename + "-" + ((CoreLabel) t.label()).get(CoreAnnotations.SentenceIDAnnotation.class);
            treeMap.put(id, t);
        }
        tr.close();
        System.err.printf("%s: %d trees%n", file.getName(), numTrees);
    }
    return treeMap;
}
Also used : FrenchXMLTreeReader(edu.stanford.nlp.trees.international.french.FrenchXMLTreeReader) InputStreamReader(java.io.InputStreamReader) FrenchXMLTreeReaderFactory(edu.stanford.nlp.trees.international.french.FrenchXMLTreeReaderFactory) BufferedReader(java.io.BufferedReader) Tree(edu.stanford.nlp.trees.Tree) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory) FrenchXMLTreeReaderFactory(edu.stanford.nlp.trees.international.french.FrenchXMLTreeReaderFactory) File(java.io.File) FileInputStream(java.io.FileInputStream)

Aggregations

BufferedReader (java.io.BufferedReader)5548 InputStreamReader (java.io.InputStreamReader)3430 IOException (java.io.IOException)2601 FileReader (java.io.FileReader)1283 File (java.io.File)942 InputStream (java.io.InputStream)845 ArrayList (java.util.ArrayList)766 FileInputStream (java.io.FileInputStream)694 URL (java.net.URL)526 Test (org.junit.Test)447 FileNotFoundException (java.io.FileNotFoundException)380 StringReader (java.io.StringReader)340 BufferedWriter (java.io.BufferedWriter)242 HashMap (java.util.HashMap)232 HttpURLConnection (java.net.HttpURLConnection)231 Matcher (java.util.regex.Matcher)223 OutputStreamWriter (java.io.OutputStreamWriter)212 PrintWriter (java.io.PrintWriter)208 URLConnection (java.net.URLConnection)208 Reader (java.io.Reader)201