use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class TestDictionaryMaker method testMakeDictionary.
public void testMakeDictionary() throws Exception {
final DictionaryMaker dictionaryMaker = new DictionaryMaker();
CorpusLoader.walk("data/2014", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
addToDictionary(document, dictionaryMaker);
}
});
dictionaryMaker.saveTxtTo("data/2014_dictionary.txt");
}
use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class TestDictionaryMaker method testSingleDocument.
public void testSingleDocument() throws Exception {
Document document = CorpusLoader.convert2Document(new File("data/2014/0101/c1002-23996898.txt"));
DictionaryMaker dictionaryMaker = new DictionaryMaker();
System.out.println(document);
addToDictionary(document, dictionaryMaker);
dictionaryMaker.saveTxtTo("data/dictionaryTest.txt");
}
use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class TestAdjustCoreDictionary method testSortCoreNatureDictionary.
public void testSortCoreNatureDictionary() throws Exception {
DictionaryMaker dictionaryMaker = DictionaryMaker.load(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
dictionaryMaker.saveTxtTo(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
}
use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class TestAdjustCoreDictionary method testSimplifyNZ.
public void testSimplifyNZ() throws Exception {
final DictionaryMaker nzDictionary = new DictionaryMaker();
CorpusLoader.walk("D:\\Doc\\语料库\\2014", new CorpusLoader.Handler() {
@Override
public void handle(Document document) {
for (List<IWord> sentence : document.getComplexSentenceList()) {
for (IWord word : sentence) {
if (word instanceof CompoundWord && "nz".equals(word.getLabel())) {
nzDictionary.add(word);
}
}
}
}
});
nzDictionary.saveTxtTo("data/test/nz.txt");
}
use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.
the class TestNTRecognition method testRemoveP.
public void testRemoveP() throws Exception {
DictionaryMaker maker = DictionaryMaker.load(HanLP.Config.OrganizationDictionaryPath);
for (Map.Entry<String, Item> entry : maker.entrySet()) {
String word = entry.getKey();
Item item = entry.getValue();
CoreDictionary.Attribute attribute = LexiconUtility.getAttribute(word);
if (attribute == null)
continue;
if (item.containsLabel("P") && attribute.hasNatureStartsWith("u")) {
System.out.println(item + "\t" + attribute);
item.removeLabel("P");
}
}
maker.saveTxtTo(HanLP.Config.OrganizationDictionaryPath);
}
Aggregations