Search in sources :

Example 76 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class CoreMapExpressionExtractor method createExtractorFromFiles.

/**
   * Creates an extractor using the specified environment, and reading the rules from the given filenames.
   * @param env
   * @param filenames
   * @throws RuntimeException
   */
public static <M extends MatchedExpression> CoreMapExpressionExtractor<M> createExtractorFromFiles(Env env, List<String> filenames) throws RuntimeException {
    CoreMapExpressionExtractor<M> extractor = new CoreMapExpressionExtractor<>(env);
    for (String filename : filenames) {
        try {
            if (verbose)
                log.info("Reading TokensRegex rules from " + filename);
            BufferedReader br = IOUtils.readerFromString(filename);
            TokenSequenceParser parser = new TokenSequenceParser();
            parser.updateExpressionExtractor(extractor, br);
            IOUtils.closeIgnoringExceptions(br);
        } catch (Exception ex) {
            throw new RuntimeException("Error parsing file: " + filename, ex);
        }
    }
    return extractor;
}
Also used : BufferedReader(java.io.BufferedReader) TokenSequenceParser(edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParser) ParseException(edu.stanford.nlp.ling.tokensregex.parser.ParseException) IOException(java.io.IOException) TokenSequenceParseException(edu.stanford.nlp.ling.tokensregex.parser.TokenSequenceParseException)

Example 77 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class PhraseTable method readPhrases.

public void readPhrases(String filename, boolean checkTag, Pattern delimiterPattern) throws IOException {
    Timing timer = new Timing();
    timer.doing("Reading phrases: " + filename);
    BufferedReader br = IOUtils.getBufferedFileReader(filename);
    String line;
    while ((line = br.readLine()) != null) {
        if (checkTag) {
            String[] columns = delimiterPattern.split(line, 2);
            if (columns.length == 1) {
                addPhrase(columns[0]);
            } else {
                addPhrase(columns[0], columns[1]);
            }
        } else {
            addPhrase(line);
        }
    }
    br.close();
    timer.done();
}
Also used : BufferedReader(java.io.BufferedReader)

Example 78 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class PhraseTable method readPhrasesWithTagScores.

public void readPhrasesWithTagScores(String filename, Pattern fieldDelimiterPattern, Pattern countDelimiterPattern) throws IOException {
    Timing timer = new Timing();
    timer.doing("Reading phrases: " + filename);
    BufferedReader br = IOUtils.getBufferedFileReader(filename);
    String line;
    int lineno = 0;
    while ((line = br.readLine()) != null) {
        String[] columns = fieldDelimiterPattern.split(line);
        String phrase = columns[0];
        // Pick map factory to use depending on number of tags we have
        MapFactory<String, MutableDouble> mapFactory = (columns.length < 20) ? MapFactory.<String, MutableDouble>arrayMapFactory() : MapFactory.<String, MutableDouble>linkedHashMapFactory();
        Counter<String> counts = new ClassicCounter<>(mapFactory);
        for (int i = 1; i < columns.length; i++) {
            String[] tagCount = countDelimiterPattern.split(columns[i], 2);
            if (tagCount.length == 2) {
                try {
                    counts.setCount(tagCount[0], Double.parseDouble(tagCount[1]));
                } catch (NumberFormatException ex) {
                    throw new RuntimeException("Error processing field " + i + ": '" + columns[i] + "' from (" + filename + ":" + lineno + "): " + line, ex);
                }
            } else {
                throw new RuntimeException("Error processing field " + i + ": '" + columns[i] + "' from + (" + filename + ":" + lineno + "): " + line);
            }
        }
        addPhrase(phrase, null, counts);
        lineno++;
    }
    br.close();
    timer.done();
}
Also used : BufferedReader(java.io.BufferedReader) ClassicCounter(edu.stanford.nlp.stats.ClassicCounter)

Example 79 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class ConfusionMatrixTSV method main.

public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s answers_file%n", ConfusionMatrix.class.getName());
        System.exit(-1);
    }
    try {
        ConfusionMatrix<String> cm = new ConfusionMatrix<>();
        String answersFile = args[0];
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(answersFile), "UTF-8"));
        String line = br.readLine();
        for (; line != null; line = br.readLine()) {
            String[] tokens = line.split("\\s");
            if (tokens.length != 3) {
                System.err.printf("ignoring bad line");
                continue;
            //System.exit(-1);
            }
            cm.add(tokens[2], tokens[1]);
        }
        System.out.println(cm.toString());
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : ConfusionMatrix(edu.stanford.nlp.util.ConfusionMatrix) InputStreamReader(java.io.InputStreamReader) BufferedReader(java.io.BufferedReader) FileNotFoundException(java.io.FileNotFoundException) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream)

Example 80 with BufferedReader

use of java.io.BufferedReader in project CoreNLP by stanfordnlp.

the class TreeToTSV method main.

public static void main(String[] args) {
    if (args.length < 1) {
        System.err.printf("Usage: java %s tree_file%n", TreeToTSV.class.getName());
        System.exit(-1);
    }
    String treeFile = args[0];
    try {
        BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
        TreeReaderFactory trf = new SpanishTreeReaderFactory();
        TreeReader tr = trf.newTreeReader(br);
        StringBuilder sb = new StringBuilder();
        String nl = System.getProperty("line.separator");
        Pattern nePattern = Pattern.compile("^grup\\.nom\\.");
        Pattern npPattern = Pattern.compile("^np0000.$");
        for (Tree tree; (tree = tr.readTree()) != null; ) {
            for (Tree t : tree) {
                if (!t.isPreTerminal())
                    continue;
                char type = 'O';
                Tree grandma = t.ancestor(1, tree);
                String grandmaValue = ((CoreLabel) grandma.label()).value();
                // grup.nom.x
                if (nePattern.matcher(grandmaValue).find())
                    type = grandmaValue.charAt(9);
                else // else check the pos for np0000x or not
                {
                    String pos = ((CoreLabel) t.label()).value();
                    if (npPattern.matcher(pos).find())
                        type = pos.charAt(6);
                }
                Tree wordNode = t.firstChild();
                String word = ((CoreLabel) wordNode.label()).value();
                sb.append(word).append("\t");
                switch(type) {
                    case 'p':
                        sb.append("PERS");
                        break;
                    case 'l':
                        sb.append("LUG");
                        break;
                    case 'o':
                        sb.append("ORG");
                        break;
                    case '0':
                        sb.append("OTROS");
                        break;
                    default:
                        sb.append("O");
                }
                sb.append(nl);
            }
            sb.append(nl);
        }
        System.out.print(sb.toString());
        tr.close();
    } catch (UnsupportedEncodingException e) {
        e.printStackTrace();
    } catch (FileNotFoundException e) {
        e.printStackTrace();
    } catch (IOException e) {
        e.printStackTrace();
    }
}
Also used : Pattern(java.util.regex.Pattern) InputStreamReader(java.io.InputStreamReader) FileNotFoundException(java.io.FileNotFoundException) TreeReader(edu.stanford.nlp.trees.TreeReader) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException) FileInputStream(java.io.FileInputStream) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) CoreLabel(edu.stanford.nlp.ling.CoreLabel) BufferedReader(java.io.BufferedReader) Tree(edu.stanford.nlp.trees.Tree) SpanishTreeReaderFactory(edu.stanford.nlp.trees.international.spanish.SpanishTreeReaderFactory) TreeReaderFactory(edu.stanford.nlp.trees.TreeReaderFactory)

Aggregations

BufferedReader (java.io.BufferedReader)5548 InputStreamReader (java.io.InputStreamReader)3430 IOException (java.io.IOException)2601 FileReader (java.io.FileReader)1283 File (java.io.File)942 InputStream (java.io.InputStream)845 ArrayList (java.util.ArrayList)766 FileInputStream (java.io.FileInputStream)694 URL (java.net.URL)526 Test (org.junit.Test)447 FileNotFoundException (java.io.FileNotFoundException)380 StringReader (java.io.StringReader)340 BufferedWriter (java.io.BufferedWriter)242 HashMap (java.util.HashMap)232 HttpURLConnection (java.net.HttpURLConnection)231 Matcher (java.util.regex.Matcher)223 OutputStreamWriter (java.io.OutputStreamWriter)212 PrintWriter (java.io.PrintWriter)208 URLConnection (java.net.URLConnection)208 Reader (java.io.Reader)201