Search in sources :

Example 1 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestParse method testEvaluate.

public void testEvaluate() throws Exception {
    testParse();
    LinkedList<CoNLLSentence> sentenceList = CoNLLLoader.loadSentenceList("D:\\Doc\\语料库\\依存分析训练数据\\THU\\dev.conll");
    Evaluator evaluator = new Evaluator();
    int id = 1;
    for (CoNLLSentence sentence : sentenceList) {
        System.out.printf("%d / %d...", id++, sentenceList.size());
        long start = System.currentTimeMillis();
        List<Term> termList = new LinkedList<Term>();
        for (CoNLLWord word : sentence.word) {
            termList.add(new Term(word.LEMMA, Nature.valueOf(word.POSTAG)));
        }
        CoNLLSentence out = CRFDependencyParser.compute(termList);
        evaluator.e(sentence, out);
        System.out.println("done in " + (System.currentTimeMillis() - start) + " ms.");
    }
    System.out.println(evaluator);
}
Also used : CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Term(com.hankcs.hanlp.seg.common.Term) Evaluator(com.hankcs.hanlp.corpus.dependency.CoNll.Evaluator) LinkedList(java.util.LinkedList)

Example 2 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestParse method testCrfParser.

public void testCrfParser() throws Exception {
    HanLP.Config.enableDebug();
    List<Term> termList = new LinkedList<Term>();
    termList.add(new Term("坚决", Nature.ad));
    termList.add(new Term("惩治", Nature.v));
    termList.add(new Term("贪污", Nature.v));
    termList.add(new Term("贿赂", Nature.n));
    termList.add(new Term("等", Nature.udeng));
    termList.add(new Term("经济", Nature.n));
    termList.add(new Term("犯罪", Nature.vn));
    System.out.println(CRFDependencyParser.compute(termList));
}
Also used : Term(com.hankcs.hanlp.seg.common.Term) LinkedList(java.util.LinkedList)

Example 3 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestDijkstra method testSeg.

public void testSeg() throws Exception {
    String text = "商品与服务";
    DijkstraSegment segment = new DijkstraSegment();
    List<Term> resultList = segment.seg(text);
    System.out.println(resultList);
}
Also used : DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) Term(com.hankcs.hanlp.seg.common.Term)

Example 4 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestPersonRecognition method testBatch.

public void testBatch() throws Exception {
    List<File> fileList = FolderWalker.open(FOLDER);
    int i = 0;
    for (File file : fileList) {
        System.out.println(++i + " / " + fileList.size() + " " + file.getName() + " ");
        String path = file.getAbsolutePath();
        String content = IOUtil.readTxt(path);
        DijkstraSegment segment = new DijkstraSegment();
        List<List<Term>> sentenceList = segment.seg2sentence(content);
        for (List<Term> sentence : sentenceList) {
            if (SentencesUtil.hasNature(sentence, Nature.nr)) {
                System.out.println(sentence);
            }
        }
    }
}
Also used : DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) List(java.util.List) Term(com.hankcs.hanlp.seg.common.Term) File(java.io.File)

Example 5 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestSegment method testMultiThreading.

public void testMultiThreading() throws Exception {
    Segment segment = BasicTokenizer.SEGMENT;
    // 测个速度
    String text = "江西鄱阳湖干枯,中国最大淡水湖变成大草原。";
    System.out.println(segment.seg(text));
    int pressure = 100000;
    StringBuilder sbBigText = new StringBuilder(text.length() * pressure);
    for (int i = 0; i < pressure; i++) {
        sbBigText.append(text);
    }
    text = sbBigText.toString();
    long start = System.currentTimeMillis();
    List<Term> termList1 = segment.seg(text);
    double costTime = (System.currentTimeMillis() - start) / (double) 1000;
    System.out.printf("单线程分词速度:%.2f字每秒\n", text.length() / costTime);
    segment.enableMultithreading(4);
    start = System.currentTimeMillis();
    List<Term> termList2 = segment.seg(text);
    costTime = (System.currentTimeMillis() - start) / (double) 1000;
    System.out.printf("四线程分词速度:%.2f字每秒\n", text.length() / costTime);
    assertEquals(termList1.size(), termList2.size());
    Iterator<Term> iterator1 = termList1.iterator();
    Iterator<Term> iterator2 = termList2.iterator();
    while (iterator1.hasNext()) {
        Term term1 = iterator1.next();
        Term term2 = iterator2.next();
        assertEquals(term1.word, term2.word);
        assertEquals(term1.nature, term2.nature);
        assertEquals(term1.offset, term2.offset);
    }
}
Also used : Term(com.hankcs.hanlp.seg.common.Term) ResultTerm(com.hankcs.hanlp.seg.common.ResultTerm) Segment(com.hankcs.hanlp.seg.Segment) DoubleArrayTrieSegment(com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment) DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) ViterbiSegment(com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)

Aggregations

Term (com.hankcs.hanlp.seg.common.Term)48 Segment (com.hankcs.hanlp.seg.Segment)12 DijkstraSegment (com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment)8 LinkedList (java.util.LinkedList)7 CRFSegment (com.hankcs.hanlp.seg.CRF.CRFSegment)5 ResultTerm (com.hankcs.hanlp.seg.common.ResultTerm)5 Vertex (com.hankcs.hanlp.seg.common.Vertex)5 CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)4 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)4 DoubleArrayTrieSegment (com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment)4 ViterbiSegment (com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)4 ArrayList (java.util.ArrayList)4 Nature (com.hankcs.hanlp.corpus.tag.Nature)3 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)3 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)2 Filter (com.hankcs.hanlp.dictionary.stopword.Filter)2 Table (com.hankcs.hanlp.model.crf.Table)2 HMMSegment (com.hankcs.hanlp.seg.HMM.HMMSegment)2 AtomNode (com.hankcs.hanlp.seg.NShort.Path.AtomNode)2 File (java.io.File)2