use of com.hankcs.hanlp.corpus.document.sentence.word.IWord in project HanLP by hankcs.
the class AdjustCorpus method testPlay.
public void testPlay() throws Exception {
final TFDictionary tfDictionary = new TFDictionary();
CorpusLoader.walk("D:\\JavaProjects\\CorpusToolBox\\data\\2014", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
for (List<IWord> wordList : document.getComplexSentenceList()) {
for (IWord word : wordList) {
if (word instanceof CompoundWord && word.getLabel().equals("ns")) {
tfDictionary.add(word.toString());
}
}
}
}
});
tfDictionary.saveTxtTo("data/test/complex_ns.txt");
}
use of com.hankcs.hanlp.corpus.document.sentence.word.IWord in project HanLP by hankcs.
the class TestAdjustCoreDictionary method testSimplifyNZ.
public void testSimplifyNZ() throws Exception {
final DictionaryMaker nzDictionary = new DictionaryMaker();
CorpusLoader.walk("D:\\Doc\\语料库\\2014", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
for (List<IWord> sentence : document.getComplexSentenceList()) {
for (IWord word : sentence) {
if (word instanceof CompoundWord && "nz".equals(word.getLabel())) {
nzDictionary.add(word);
}
}
}
}
});
nzDictionary.saveTxtTo("data/test/nz.txt");
}
Aggregations