use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class CoreMapExpressionExtractor method createExtractorFromFiles.
/**
* Creates an extractor using the specified environment, and reading the rules from the given filenames.
* @param env
* @param filenames
* @throws RuntimeException
*/
public static <M extends MatchedExpression> CoreMapExpressionExtractor<M> createExtractorFromFiles(Env env, List<String> filenames) throws RuntimeException {
CoreMapExpressionExtractor<M> extractor = new CoreMapExpressionExtractor<>(env);
for (String filename : filenames) {
try {
if (verbose)
log.info("Reading TokensRegex rules from " + filename);
BufferedReader br = IOUtils.readerFromString(filename);
TokenSequenceParser parser = new TokenSequenceParser();
parser.updateExpressionExtractor(extractor, br);
IOUtils.closeIgnoringExceptions(br);
} catch (Exception ex) {
throw new RuntimeException("Error parsing file: " + filename, ex);
}
}
return extractor;
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class PhraseTable method readPhrases.
public void readPhrases(String filename, boolean checkTag, Pattern delimiterPattern) throws IOException {
Timing timer = new Timing();
timer.doing("Reading phrases: " + filename);
BufferedReader br = IOUtils.getBufferedFileReader(filename);
String line;
while ((line = br.readLine()) != null) {
if (checkTag) {
String[] columns = delimiterPattern.split(line, 2);
if (columns.length == 1) {
addPhrase(columns[0]);
} else {
addPhrase(columns[0], columns[1]);
}
} else {
addPhrase(line);
}
}
br.close();
timer.done();
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class PhraseTable method readPhrasesWithTagScores.
public void readPhrasesWithTagScores(String filename, Pattern fieldDelimiterPattern, Pattern countDelimiterPattern) throws IOException {
Timing timer = new Timing();
timer.doing("Reading phrases: " + filename);
BufferedReader br = IOUtils.getBufferedFileReader(filename);
String line;
int lineno = 0;
while ((line = br.readLine()) != null) {
String[] columns = fieldDelimiterPattern.split(line);
String phrase = columns[0];
// Pick map factory to use depending on number of tags we have
MapFactory<String, MutableDouble> mapFactory = (columns.length < 20) ? MapFactory.<String, MutableDouble>arrayMapFactory() : MapFactory.<String, MutableDouble>linkedHashMapFactory();
Counter<String> counts = new ClassicCounter<>(mapFactory);
for (int i = 1; i < columns.length; i++) {
String[] tagCount = countDelimiterPattern.split(columns[i], 2);
if (tagCount.length == 2) {
try {
counts.setCount(tagCount[0], Double.parseDouble(tagCount[1]));
} catch (NumberFormatException ex) {
throw new RuntimeException("Error processing field " + i + ": '" + columns[i] + "' from (" + filename + ":" + lineno + "): " + line, ex);
}
} else {
throw new RuntimeException("Error processing field " + i + ": '" + columns[i] + "' from + (" + filename + ":" + lineno + "): " + line);
}
}
addPhrase(phrase, null, counts);
lineno++;
}
br.close();
timer.done();
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class ConfusionMatrixTSV method main.
public static void main(String[] args) {
if (args.length < 1) {
System.err.printf("Usage: java %s answers_file%n", ConfusionMatrix.class.getName());
System.exit(-1);
}
try {
ConfusionMatrix<String> cm = new ConfusionMatrix<>();
String answersFile = args[0];
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(answersFile), "UTF-8"));
String line = br.readLine();
for (; line != null; line = br.readLine()) {
String[] tokens = line.split("\\s");
if (tokens.length != 3) {
System.err.printf("ignoring bad line");
continue;
//System.exit(-1);
}
cm.add(tokens[2], tokens[1]);
}
System.out.println(cm.toString());
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
use of java.io.BufferedReader in project CoreNLP by stanfordnlp.
the class TreeToTSV method main.
public static void main(String[] args) {
if (args.length < 1) {
System.err.printf("Usage: java %s tree_file%n", TreeToTSV.class.getName());
System.exit(-1);
}
String treeFile = args[0];
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
TreeReaderFactory trf = new SpanishTreeReaderFactory();
TreeReader tr = trf.newTreeReader(br);
StringBuilder sb = new StringBuilder();
String nl = System.getProperty("line.separator");
Pattern nePattern = Pattern.compile("^grup\\.nom\\.");
Pattern npPattern = Pattern.compile("^np0000.$");
for (Tree tree; (tree = tr.readTree()) != null; ) {
for (Tree t : tree) {
if (!t.isPreTerminal())
continue;
char type = 'O';
Tree grandma = t.ancestor(1, tree);
String grandmaValue = ((CoreLabel) grandma.label()).value();
// grup.nom.x
if (nePattern.matcher(grandmaValue).find())
type = grandmaValue.charAt(9);
else // else check the pos for np0000x or not
{
String pos = ((CoreLabel) t.label()).value();
if (npPattern.matcher(pos).find())
type = pos.charAt(6);
}
Tree wordNode = t.firstChild();
String word = ((CoreLabel) wordNode.label()).value();
sb.append(word).append("\t");
switch(type) {
case 'p':
sb.append("PERS");
break;
case 'l':
sb.append("LUG");
break;
case 'o':
sb.append("ORG");
break;
case '0':
sb.append("OTROS");
break;
default:
sb.append("O");
}
sb.append(nl);
}
sb.append(nl);
}
System.out.print(sb.toString());
tr.close();
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations