use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestXianDaiHanYu method testSeg.
public void testSeg() throws Exception {
Segment segment = new NShortSegment().enableNameRecognize(true);
HanLP.Config.enableDebug(true);
System.out.println(segment.seg("我在区人保工作"));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testJP.
public void testJP() throws Exception {
String text = "明天8.9你好abc对了";
Segment segment = new ViterbiSegment().enableCustomDictionary(false).enableAllNamedEntityRecognize(false);
System.out.println(segment.seg(text));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testIssue290.
public void testIssue290() throws Exception {
// HanLP.Config.enableDebug();
String txt = "而其他肢解出去的七个贝尔公司如西南贝尔、太平洋贝尔、大西洋贝尔。";
Segment seg_viterbi = new ViterbiSegment().enablePartOfSpeechTagging(true).enableOffset(true).enableNameRecognize(true).enablePlaceRecognize(true).enableOrganizationRecognize(true).enableNumberQuantifierRecognize(true);
System.out.println(seg_viterbi.seg(txt));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testIssue358.
public void testIssue358() throws Exception {
HanLP.Config.enableDebug();
String text = "受约束,需要遵守心理学会所定的道德原则,所需要时须说明该实验与所能得到的知识的关系";
Segment segment = StandardTokenizer.SEGMENT.enableAllNamedEntityRecognize(false).enableCustomDictionary(false).enableOrganizationRecognize(true);
System.out.println(segment.seg(text));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testMultiThreading.
public void testMultiThreading() throws Exception {
Segment segment = BasicTokenizer.SEGMENT;
// 测个速度
String text = "江西鄱阳湖干枯,中国最大淡水湖变成大草原。";
System.out.println(segment.seg(text));
int pressure = 100000;
StringBuilder sbBigText = new StringBuilder(text.length() * pressure);
for (int i = 0; i < pressure; i++) {
sbBigText.append(text);
}
text = sbBigText.toString();
long start = System.currentTimeMillis();
List<Term> termList1 = segment.seg(text);
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime);
segment.enableMultithreading(4);
start = System.currentTimeMillis();
List<Term> termList2 = segment.seg(text);
costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("四线程分词速度:%.2f字每秒\n", text.length() / costTime);
assertEquals(termList1.size(), termList2.size());
Iterator<Term> iterator1 = termList1.iterator();
Iterator<Term> iterator2 = termList2.iterator();
while (iterator1.hasNext()) {
Term term1 = iterator1.next();
Term term2 = iterator2.next();
assertEquals(term1.word, term2.word);
assertEquals(term1.nature, term2.nature);
assertEquals(term1.offset, term2.offset);
}
}
Aggregations