use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestParse method testEvaluate.
public void testEvaluate() throws Exception {
testParse();
LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll");
Evaluator evaluator = new Evaluator();
int id = 1;
for (CoNLLSentence sentence : sentenceList) {
System.out.printf("%d / %d...", id++, sentenceList.size());
long start = System.currentTimeMillis();
List<Term> termList = new LinkedList<Term>();
for (CoNLLWord word : sentence.word) {
termList.add(new Term(word.LEMMA, Nature.valueOf(word.POSTAG)));
}
CoNLLSentence out = CRFDependencyParser.compute(termList);
evaluator.e(sentence, out);
System.out.println("done in " + (System.currentTimeMillis() - start) + " ms.");
}
System.out.println(evaluator);
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestParse method testCrfParser.
public void testCrfParser() throws Exception {
HanLP.Config.enableDebug();
List<Term> termList = new LinkedList<Term>();
termList.add(new Term("坚决", Nature.ad));
termList.add(new Term("惩治", Nature.v));
termList.add(new Term("贪污", Nature.v));
termList.add(new Term("贿赂", Nature.n));
termList.add(new Term("等", Nature.udeng));
termList.add(new Term("经济", Nature.n));
termList.add(new Term("犯罪", Nature.vn));
System.out.println(CRFDependencyParser.compute(termList));
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestDijkstra method testSeg.
public void testSeg() throws Exception {
String text = "商品与服务";
DijkstraSegment segment = new DijkstraSegment();
List<Term> resultList = segment.seg(text);
System.out.println(resultList);
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestPersonRecognition method testBatch.
public void testBatch() throws Exception {
List<File> fileList = FolderWalker.open(FOLDER);
int i = 0;
for (File file : fileList) {
System.out.println(++i + " / " + fileList.size() + " " + file.getName() + " ");
String path = file.getAbsolutePath();
String content = IOUtil.readTxt(path);
DijkstraSegment segment = new DijkstraSegment();
List<List<Term>> sentenceList = segment.seg2sentence(content);
for (List<Term> sentence : sentenceList) {
if (SentencesUtil.hasNature(sentence, Nature.nr)) {
System.out.println(sentence);
}
}
}
}
use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.
the class TestSegment method testMultiThreading.
public void testMultiThreading() throws Exception {
Segment segment = BasicTokenizer.SEGMENT;
// 测个速度
String text = "江西鄱阳湖干枯,中国最大淡水湖变成大草原。";
System.out.println(segment.seg(text));
int pressure = 100000;
StringBuilder sbBigText = new StringBuilder(text.length() * pressure);
for (int i = 0; i < pressure; i++) {
sbBigText.append(text);
}
text = sbBigText.toString();
long start = System.currentTimeMillis();
List<Term> termList1 = segment.seg(text);
double costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime);
segment.enableMultithreading(4);
start = System.currentTimeMillis();
List<Term> termList2 = segment.seg(text);
costTime = (System.currentTimeMillis() - start) / (double) 1000;
System.out.printf("四线程分词速度:%.2f字每秒\n", text.length() / costTime);
assertEquals(termList1.size(), termList2.size());
Iterator<Term> iterator1 = termList1.iterator();
Iterator<Term> iterator2 = termList2.iterator();
while (iterator1.hasNext()) {
Term term1 = iterator1.next();
Term term2 = iterator2.next();
assertEquals(term1.word, term2.word);
assertEquals(term1.nature, term2.nature);
assertEquals(term1.offset, term2.offset);
}
}
Aggregations