use of com.hankcs.hanlp.corpus.document.Document in project HanLP by hankcs.
the class TestMakeCompanyCorpus method testParse.
public void testParse() throws Exception {
EasyDictionary dictionary = EasyDictionary.create("data/dictionary/2014_dictionary.txt");
final NTDictionaryMaker nsDictionaryMaker = new NTDictionaryMaker(dictionary);
// CorpusLoader.walk("D:\\JavaProjects\\CorpusToolBox\\data\\2014\\", new CorpusLoader.Handler()
CorpusLoader.walk("data/test/nt/part/", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
nsDictionaryMaker.compute(document.getComplexSentenceList());
}
});
nsDictionaryMaker.saveTxtTo("D:\\JavaProjects\\HanLP\\data\\dictionary\\organization\\outerNT");
}
use of com.hankcs.hanlp.corpus.document.Document in project HanLP by hankcs.
the class TestDictionaryMaker method testMakeDictionary.
public void testMakeDictionary() throws Exception {
final DictionaryMaker dictionaryMaker = new DictionaryMaker();
CorpusLoader.walk("data/2014", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
addToDictionary(document, dictionaryMaker);
}
});
dictionaryMaker.saveTxtTo("data/2014_dictionary.txt");
}
use of com.hankcs.hanlp.corpus.document.Document in project HanLP by hankcs.
the class TestDictionaryMaker method testSingleDocument.
public void testSingleDocument() throws Exception {
Document document = CorpusLoader.convert2Document(new File("data/2014/0101/c1002-23996898.txt"));
DictionaryMaker dictionaryMaker = new DictionaryMaker();
System.out.println(document);
addToDictionary(document, dictionaryMaker);
dictionaryMaker.saveTxtTo("data/dictionaryTest.txt");
}
use of com.hankcs.hanlp.corpus.document.Document in project HanLP by hankcs.
the class TestAdjustCoreDictionary method testSimplifyNZ.
public void testSimplifyNZ() throws Exception {
final DictionaryMaker nzDictionary = new DictionaryMaker();
CorpusLoader.walk("D:\\Doc\\语料库\\2014", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
for (List<IWord> sentence : document.getComplexSentenceList()) {
for (IWord word : sentence) {
if (word instanceof CompoundWord && "nz".equals(word.getLabel())) {
nzDictionary.add(word);
}
}
}
}
});
nzDictionary.saveTxtTo("data/test/nz.txt");
}
Aggregations