use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestCharacterBasedGenerativeModel method testSegment.
public void testSegment() throws Exception {
HanLP.Config.ShowTermNature = false;
String text = "我实现了一个基于Character Based TriGram的分词器";
Segment segment = new HMMSegment();
List<Term> termList = segment.seg(text);
System.out.println(termList);
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestCustomDictionary method testRemoveNotNS.
/**
* data/dictionary/custom/全国地名大全.txt中有很多人名,删掉它们
* @throws Exception
*/
public void testRemoveNotNS() throws Exception {
String path = "data/dictionary/custom/全国地名大全.txt";
final Set<Character> suffixSet = new TreeSet<Character>();
for (char c : Predefine.POSTFIX_SINGLE.toCharArray()) {
suffixSet.add(c);
}
DictionaryMaker.load(path).saveTxtTo(path, new DictionaryMaker.Filter() {
Segment segment = HanLP.newSegment().enableCustomDictionary(false);
@Override
public boolean onSave(Item item) {
if (suffixSet.contains(item.key.charAt(item.key.length() - 1)))
return true;
List<Term> termList = segment.seg(item.key);
if (termList.size() == 1 && termList.get(0).nature == Nature.nr) {
System.out.println(item);
return false;
}
return true;
}
});
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestPersonRecognition method testJPName.
public void testJPName() throws Exception {
HanLP.Config.enableDebug();
Segment segment = new DijkstraSegment().enableJapaneseNameRecognize(true);
System.out.println(segment.seg("北川景子参演了林诣彬导演"));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestPersonRecognition method testChineseNameRecognition.
public void testChineseNameRecognition() throws Exception {
HanLP.Config.enableDebug();
Segment segment = new DijkstraSegment();
System.out.println(segment.seg("编剧邵钧林和稽道青说"));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testCustomDictionary.
public void testCustomDictionary() throws Exception {
CustomDictionary.insert("肯德基", "ns 1000");
Segment segment = new ViterbiSegment();
System.out.println(segment.seg("肯德基"));
}
Aggregations