use of edu.stanford.nlp.trees.international.spanish.SpanishXMLTreeReaderFactory in project CoreNLP by stanfordnlp.
the class AnCoraPOSStats method process.
public void process() throws IOException {
SpanishXMLTreeReaderFactory trf = new SpanishXMLTreeReaderFactory();
Tree t;
for (File file : fileList) {
Reader in = new BufferedReader(new InputStreamReader(new FileInputStream(file), ANCORA_ENCODING));
TreeReader tr = trf.newTreeReader(in);
// Tree reading will implicitly perform tree normalization for us
while ((t = tr.readTree()) != null) {
// Update tagger with this tree
List<CoreLabel> yield = t.taggedLabeledYield();
for (CoreLabel leafLabel : yield) {
if (leafLabel.tag().equals(SpanishTreeNormalizer.MW_TAG))
continue;
unigramTagger.incrementCount(leafLabel.word(), leafLabel.tag());
}
}
}
}
Aggregations