Search in sources :

Example 11 with DictionaryMaker

use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.

the class TestDictionaryMaker method testMakeDictionary.

public void testMakeDictionary() throws Exception {
    final DictionaryMaker dictionaryMaker = new DictionaryMaker();
    CorpusLoader.walk("data/2014", new CorpusLoader.Handler() {

        @Override
        public void handle(Document document) {
            addToDictionary(document, dictionaryMaker);
        }
    });
    dictionaryMaker.saveTxtTo("data/2014_dictionary.txt");
}
Also used : CorpusLoader(com.hankcs.hanlp.corpus.document.CorpusLoader) DictionaryMaker(com.hankcs.hanlp.corpus.dictionary.DictionaryMaker) Document(com.hankcs.hanlp.corpus.document.Document)

Example 12 with DictionaryMaker

use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.

the class TestDictionaryMaker method testSingleDocument.

public void testSingleDocument() throws Exception {
    Document document = CorpusLoader.convert2Document(new File("data/2014/0101/c1002-23996898.txt"));
    DictionaryMaker dictionaryMaker = new DictionaryMaker();
    System.out.println(document);
    addToDictionary(document, dictionaryMaker);
    dictionaryMaker.saveTxtTo("data/dictionaryTest.txt");
}
Also used : DictionaryMaker(com.hankcs.hanlp.corpus.dictionary.DictionaryMaker) Document(com.hankcs.hanlp.corpus.document.Document) File(java.io.File)

Example 13 with DictionaryMaker

use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.

the class TestAdjustCoreDictionary method testSortCoreNatureDictionary.

public void testSortCoreNatureDictionary() throws Exception {
    DictionaryMaker dictionaryMaker = DictionaryMaker.load(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
    dictionaryMaker.saveTxtTo(DATA_DICTIONARY_CORE_NATURE_DICTIONARY_TXT);
}
Also used : DictionaryMaker(com.hankcs.hanlp.corpus.dictionary.DictionaryMaker)

Example 14 with DictionaryMaker

use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.

the class TestAdjustCoreDictionary method testSimplifyNZ.

public void testSimplifyNZ() throws Exception {
    final DictionaryMaker nzDictionary = new DictionaryMaker();
    CorpusLoader.walk("D:\\Doc\\语料库\\2014", new CorpusLoader.Handler() {

        @Override
        public void handle(Document document) {
            for (List<IWord> sentence : document.getComplexSentenceList()) {
                for (IWord word : sentence) {
                    if (word instanceof CompoundWord && "nz".equals(word.getLabel())) {
                        nzDictionary.add(word);
                    }
                }
            }
        }
    });
    nzDictionary.saveTxtTo("data/test/nz.txt");
}
Also used : CorpusLoader(com.hankcs.hanlp.corpus.document.CorpusLoader) DictionaryMaker(com.hankcs.hanlp.corpus.dictionary.DictionaryMaker) List(java.util.List) Document(com.hankcs.hanlp.corpus.document.Document) CompoundWord(com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord) IWord(com.hankcs.hanlp.corpus.document.sentence.word.IWord)

Example 15 with DictionaryMaker

use of com.hankcs.hanlp.corpus.dictionary.DictionaryMaker in project HanLP by hankcs.

the class TestNTRecognition method testRemoveP.

public void testRemoveP() throws Exception {
    DictionaryMaker maker = DictionaryMaker.load(HanLP.Config.OrganizationDictionaryPath);
    for (Map.Entry<String, Item> entry : maker.entrySet()) {
        String word = entry.getKey();
        Item item = entry.getValue();
        CoreDictionary.Attribute attribute = LexiconUtility.getAttribute(word);
        if (attribute == null)
            continue;
        if (item.containsLabel("P") && attribute.hasNatureStartsWith("u")) {
            System.out.println(item + "\t" + attribute);
            item.removeLabel("P");
        }
    }
    maker.saveTxtTo(HanLP.Config.OrganizationDictionaryPath);
}
Also used : Item(com.hankcs.hanlp.corpus.dictionary.item.Item) DictionaryMaker(com.hankcs.hanlp.corpus.dictionary.DictionaryMaker) CoreDictionary(com.hankcs.hanlp.dictionary.CoreDictionary) Map(java.util.Map)

Aggregations

DictionaryMaker (com.hankcs.hanlp.corpus.dictionary.DictionaryMaker)17 Item (com.hankcs.hanlp.corpus.dictionary.item.Item)7 Document (com.hankcs.hanlp.corpus.document.Document)5 CorpusLoader (com.hankcs.hanlp.corpus.document.CorpusLoader)4 IWord (com.hankcs.hanlp.corpus.document.sentence.word.IWord)3 List (java.util.List)3 CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)2 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)2 Word (com.hankcs.hanlp.corpus.document.sentence.word.Word)2 BufferedReader (java.io.BufferedReader)2 FileInputStream (java.io.FileInputStream)2 InputStreamReader (java.io.InputStreamReader)2 Map (java.util.Map)2 CompoundWord (com.hankcs.hanlp.corpus.document.sentence.word.CompoundWord)1 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)1 File (java.io.File)1 TreeSet (java.util.TreeSet)1 Matcher (java.util.regex.Matcher)1 Pattern (java.util.regex.Pattern)1