use of edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory in project CoreNLP by stanfordnlp.
the class FTBCorrector method main.
/**
* @param args
*/
public static void main(String[] args) {
if (args.length != 1) {
log.info("Usage: java " + FTBCorrector.class.getName() + " filename\n");
System.exit(-1);
}
TreeTransformer tt = new FTBCorrector();
File f = new File(args[0]);
try {
//These bad trees in the Candito training set should be thrown out:
// (ROOT (SENT (" ") (. .)))
// (ROOT (SENT (. .)))
TregexPattern pBadTree = TregexPattern.compile("@SENT <: @PUNC");
TregexPattern pBadTree2 = TregexPattern.compile("@SENT <1 @PUNC <2 @PUNC !<3 __");
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(f), "UTF-8"));
TreeReaderFactory trf = new FrenchTreeReaderFactory();
TreeReader tr = trf.newTreeReader(br);
int nTrees = 0;
for (Tree t; (t = tr.readTree()) != null; nTrees++) {
TregexMatcher m = pBadTree.matcher(t);
TregexMatcher m2 = pBadTree2.matcher(t);
if (m.find() || m2.find()) {
log.info("Discarding tree: " + t.toString());
} else {
Tree fixedT = tt.transformTree(t);
System.out.println(fixedT.toString());
}
}
tr.close();
System.err.printf("Wrote %d trees%n", nTrees);
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} catch (TregexParseException e) {
e.printStackTrace();
}
}
use of edu.stanford.nlp.trees.international.french.FrenchTreeReaderFactory in project CoreNLP by stanfordnlp.
the class MWEPreprocessor method resolveDummyTags.
private static void resolveDummyTags(File treeFile, TwoDimensionalCounter<String, String> pretermLabel, TwoDimensionalCounter<String, String> unigramTagger) {
try {
BufferedReader br = new BufferedReader(new InputStreamReader(new FileInputStream(treeFile), "UTF-8"));
TreeReaderFactory trf = new FrenchTreeReaderFactory();
TreeReader tr = trf.newTreeReader(br);
PrintWriter pw = new PrintWriter(new PrintStream(new FileOutputStream(new File(treeFile + ".fixed")), false, "UTF-8"));
int nTrees = 0;
for (Tree t; (t = tr.readTree()) != null; nTrees++) {
traverseAndFix(t, pretermLabel, unigramTagger);
pw.println(t.toString());
}
pw.close();
tr.close();
System.out.println("Processed " + nTrees + " trees");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
Aggregations