Search in sources :

Example 16 with Segment

use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.

the class TestCharacterBasedGenerativeModel method testSegment.

public void testSegment() throws Exception {
    HanLP.Config.ShowTermNature = false;
    String text = "我实现了一个基于Character Based TriGram的分词器";
    Segment segment = new HMMSegment();
    List<Term> termList = segment.seg(text);
    System.out.println(termList);
}
Also used : Term(com.hankcs.hanlp.seg.common.Term) Segment(com.hankcs.hanlp.seg.Segment) HMMSegment(com.hankcs.hanlp.seg.HMM.HMMSegment) CharacterBasedGenerativeModelSegment(com.hankcs.hanlp.seg.CharacterBasedGenerativeModelSegment) HMMSegment(com.hankcs.hanlp.seg.HMM.HMMSegment)

Example 17 with Segment

use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.

the class TestCustomDictionary method testRemoveNotNS.

/**
     * data/dictionary/custom/全国地名大全.txt中有很多人名,删掉它们
     * @throws Exception
     */
public void testRemoveNotNS() throws Exception {
    String path = "data/dictionary/custom/全国地名大全.txt";
    final Set<Character> suffixSet = new TreeSet<Character>();
    for (char c : Predefine.POSTFIX_SINGLE.toCharArray()) {
        suffixSet.add(c);
    }
    DictionaryMaker.load(path).saveTxtTo(path, new DictionaryMaker.Filter() {

        Segment segment = HanLP.newSegment().enableCustomDictionary(false);

        @Override
        public boolean onSave(Item item) {
            if (suffixSet.contains(item.key.charAt(item.key.length() - 1)))
                return true;
            List<Term> termList = segment.seg(item.key);
            if (termList.size() == 1 && termList.get(0).nature == Nature.nr) {
                System.out.println(item);
                return false;
            }
            return true;
        }
    });
}
Also used : Item(com.hankcs.hanlp.corpus.dictionary.item.Item) DictionaryMaker(com.hankcs.hanlp.corpus.dictionary.DictionaryMaker) Segment(com.hankcs.hanlp.seg.Segment)

Example 18 with Segment

use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.

the class TestPersonRecognition method testJPName.

public void testJPName() throws Exception {
    HanLP.Config.enableDebug();
    Segment segment = new DijkstraSegment().enableJapaneseNameRecognize(true);
    System.out.println(segment.seg("北川景子参演了林诣彬导演"));
}
Also used : DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) NShortSegment(com.hankcs.hanlp.seg.NShort.NShortSegment) Segment(com.hankcs.hanlp.seg.Segment) DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment)

Example 19 with Segment

use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.

the class TestPersonRecognition method testChineseNameRecognition.

public void testChineseNameRecognition() throws Exception {
    HanLP.Config.enableDebug();
    Segment segment = new DijkstraSegment();
    System.out.println(segment.seg("编剧邵钧林和稽道青说"));
}
Also used : DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) NShortSegment(com.hankcs.hanlp.seg.NShort.NShortSegment) Segment(com.hankcs.hanlp.seg.Segment) DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment)

Example 20 with Segment

use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.

the class TestSegment method testCustomDictionary.

public void testCustomDictionary() throws Exception {
    CustomDictionary.insert("肯德基", "ns 1000");
    Segment segment = new ViterbiSegment();
    System.out.println(segment.seg("肯德基"));
}
Also used : ViterbiSegment(com.hankcs.hanlp.seg.Viterbi.ViterbiSegment) Segment(com.hankcs.hanlp.seg.Segment) DoubleArrayTrieSegment(com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment) DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) ViterbiSegment(com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)

Aggregations

Segment (com.hankcs.hanlp.seg.Segment)33 CRFSegment (com.hankcs.hanlp.seg.CRF.CRFSegment)20 DijkstraSegment (com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment)20 ViterbiSegment (com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)19 DoubleArrayTrieSegment (com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment)18 Term (com.hankcs.hanlp.seg.common.Term)12 NShortSegment (com.hankcs.hanlp.seg.NShort.NShortSegment)4 ResultTerm (com.hankcs.hanlp.seg.common.ResultTerm)4 HMMSegment (com.hankcs.hanlp.seg.HMM.HMMSegment)2 DictionaryMaker (com.hankcs.hanlp.corpus.dictionary.DictionaryMaker)1 Item (com.hankcs.hanlp.corpus.dictionary.item.Item)1 CharacterBasedGenerativeModelSegment (com.hankcs.hanlp.seg.CharacterBasedGenerativeModelSegment)1