use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testIssue71.
public void testIssue71() throws Exception {
Segment segment = HanLP.newSegment();
segment = segment.enableAllNamedEntityRecognize(true);
segment = segment.enableNumberQuantifierRecognize(true);
System.out.println(segment.seg("曾幻想过,若干年后的我就是这个样子的吗"));
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class TestSegment method testSpeedOfSecondViterbi.
public void testSpeedOfSecondViterbi() throws Exception {
String text = "王总和小丽结婚了";
Segment segment = new ViterbiSegment().enableAllNamedEntityRecognize(false).enableNameRecognize(// 人名识别需要二次维特比,比较慢
false).enableCustomDictionary(false);
System.out.println(segment.seg(text));
long start = System.currentTimeMillis();
int pressure = 1000000;
for (int i = 0; i < pressure; ++i) {
segment.seg(text);
}
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("分词速度:%.2f字每秒", text.length() * pressure / costTime);
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class DemoChineseNameRecognition method main.
public static void main(String[] args) {
String[] testCase = new String[] { "签约仪式前,秦光荣、李纪恒、仇和等一同会见了参加签约的企业家。", "区长庄木弟新年致辞", "朱立伦:两岸都希望共创双赢 习朱历史会晤在即", "陕西首富吴一坚被带走 与令计划妻子有交集", "据美国之音电台网站4月28日报道,8岁的凯瑟琳·克罗尔(凤甫娟)和很多华裔美国小朋友一样,小小年纪就开始学小提琴了。她的妈妈是位虎妈么?", "凯瑟琳和露西(庐瑞媛),跟她们的哥哥们有一些不同。", "王国强、高峰、汪洋、张朝阳光着头、韩寒、小四", "张浩和胡健康复员回家了", "王总和小丽结婚了", "编剧邵钧林和稽道青说", "这里有关天培的有关事迹", "龚学平等领导说,邓颖超生前杜绝超生" };
Segment segment = HanLP.newSegment().enableNameRecognize(true);
for (String sentence : testCase) {
List<Term> termList = segment.seg(sentence);
System.out.println(termList);
}
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class DemoHMMSegment method main.
public static void main(String[] args) {
// 关闭词性显示
HanLP.Config.ShowTermNature = false;
Segment segment = new HMMSegment();
String[] sentenceArray = new String[] { "HanLP是由一系列模型与算法组成的Java工具包,目标是普及自然语言处理在生产环境中的应用。", // 专业名词有一定辨识能力
"高锰酸钾,强氧化剂,紫红色晶体,可溶于水,遇乙醇即被还原。常用作消毒剂、水净化剂、氧化剂、漂白剂、毒气吸收剂、二氧化碳精制剂等。", // 非新闻语料
"《夜晚的骰子》通过描述浅草的舞女在暗夜中扔骰子的情景,寄托了作者对庶民生活区的情感", // 微博
"这个像是真的[委屈]前面那个打扮太江户了,一点不上品...@hankcs", "鼎泰丰的小笼一点味道也没有...每样都淡淡的...淡淡的,哪有食堂2A的好次", "克里斯蒂娜·克罗尔说:不,我不是虎妈。我全家都热爱音乐,我也鼓励他们这么做。", "今日APPS:Sago Mini Toolbox培养孩子动手能力", "财政部副部长王保安调任国家统计局党组书记", "2.34米男子娶1.53米女粉丝 称夫妻生活没问题", "你看过穆赫兰道吗", "乐视超级手机能否承载贾布斯的生态梦" };
for (String sentence : sentenceArray) {
List<Term> termList = segment.seg(sentence);
System.out.println(termList);
}
// 测个速度
String text = "江西鄱阳湖干枯,中国最大淡水湖变成大草原";
System.out.println(segment.seg(text));
long start = System.currentTimeMillis();
int pressure = 1000;
for (int i = 0; i < pressure; ++i) {
segment.seg(text);
}
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("HMM2分词速度:%.2f字每秒\n", text.length() * pressure / costTime);
}
use of com.hankcs.hanlp.seg.Segment in project HanLP by hankcs.
the class DemoOrganizationRecognition method main.
public static void main(String[] args) {
String[] testCase = new String[] { "我在上海林原科技有限公司兼职工作,", "我经常在台川喜宴餐厅吃饭,", "偶尔去开元地中海影城看电影。" };
Segment segment = HanLP.newSegment().enableOrganizationRecognize(true);
for (String sentence : testCase) {
List<Term> termList = segment.seg(sentence);
System.out.println(termList);
}
}
Aggregations