Search in sources :

Example 6 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestSegment method testViterbi.

public void testViterbi() throws Exception {
    HanLP.Config.enableDebug(true);
    CustomDictionary.add("网剧");
    Segment seg = new DijkstraSegment();
    List<Term> termList = seg.seg("优酷总裁魏明介绍了优酷2015年的内容战略,表示要以“大电影、大网剧、大综艺”为关键词");
    System.out.println(termList);
}
Also used : DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) Term(com.hankcs.hanlp.seg.common.Term) ResultTerm(com.hankcs.hanlp.seg.common.ResultTerm) Segment(com.hankcs.hanlp.seg.Segment) DoubleArrayTrieSegment(com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment) DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) ViterbiSegment(com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)

Example 7 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestSegment method testWrapper.

public void testWrapper() throws Exception {
    SegmentWrapper wrapper = new SegmentWrapper(new BufferedReader(new StringReader("中科院预测科学研究中心学术委员会\nhaha")), StandardTokenizer.SEGMENT);
    Term fullTerm;
    while ((fullTerm = wrapper.next()) != null) {
        System.out.println(fullTerm);
    }
}
Also used : BufferedReader(java.io.BufferedReader) StringReader(java.io.StringReader) Term(com.hankcs.hanlp.seg.common.Term) ResultTerm(com.hankcs.hanlp.seg.common.ResultTerm) SegmentWrapper(com.hankcs.hanlp.seg.common.wrapper.SegmentWrapper)

Example 8 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class TestSegment method testIssue199.

public void testIssue199() throws Exception {
    Segment segment = new CRFSegment();
    // 开启自定义词典
    segment.enableCustomDictionary(false);
    segment.enablePartOfSpeechTagging(true);
    List<Term> termList = segment.seg("更多采购");
    System.out.println(termList);
    for (Term term : termList) {
        if (term.nature == null) {
            System.out.println("识别到新词:" + term.word);
        }
    }
}
Also used : Term(com.hankcs.hanlp.seg.common.Term) ResultTerm(com.hankcs.hanlp.seg.common.ResultTerm) Segment(com.hankcs.hanlp.seg.Segment) DoubleArrayTrieSegment(com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment) DijkstraSegment(com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment) ViterbiSegment(com.hankcs.hanlp.seg.Viterbi.ViterbiSegment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment)

Example 9 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class Occurrence method addAll.

public void addAll(List<Term> resultList) {
    //        System.out.println(resultList);
    String[] termList = new String[resultList.size()];
    int i = 0;
    for (Term word : resultList) {
        termList[i] = word.word;
        ++i;
    }
    addAll(termList);
}
Also used : Term(com.hankcs.hanlp.seg.common.Term)

Example 10 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class CRFDependencyParser method parse.

@Override
public CoNLLSentence parse(List<Term> termList) {
    Table table = new Table();
    table.v = new String[termList.size()][4];
    Iterator<Term> iterator = termList.iterator();
    for (String[] line : table.v) {
        Term term = iterator.next();
        line[0] = term.word;
        line[2] = POSUtil.compilePOS(term.nature);
        line[1] = line[2].substring(0, 1);
    }
    crfModel.tag(table);
    if (HanLP.Config.DEBUG) {
        System.out.println(table);
    }
    CoNLLWord[] coNLLWordArray = new CoNLLWord[table.size()];
    for (int i = 0; i < coNLLWordArray.length; i++) {
        coNLLWordArray[i] = new CoNLLWord(i + 1, table.v[i][0], table.v[i][2], table.v[i][1]);
    }
    int i = 0;
    for (String[] line : table.v) {
        CRFModelForDependency.DTag dTag = new CRFModelForDependency.DTag(line[3]);
        if (dTag.pos.endsWith("ROOT")) {
            coNLLWordArray[i].HEAD = CoNLLWord.ROOT;
        } else {
            int index = convertOffset2Index(dTag, table, i);
            if (index == -1)
                coNLLWordArray[i].HEAD = CoNLLWord.NULL;
            else
                coNLLWordArray[i].HEAD = coNLLWordArray[index];
        }
        ++i;
    }
    for (i = 0; i < coNLLWordArray.length; i++) {
        coNLLWordArray[i].DEPREL = BigramDependencyModel.get(coNLLWordArray[i].NAME, coNLLWordArray[i].POSTAG, coNLLWordArray[i].HEAD.NAME, coNLLWordArray[i].HEAD.POSTAG);
    }
    return new CoNLLSentence(coNLLWordArray);
}
Also used : Table(com.hankcs.hanlp.model.crf.Table) CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Term(com.hankcs.hanlp.seg.common.Term)

Aggregations

Term (com.hankcs.hanlp.seg.common.Term)48 Segment (com.hankcs.hanlp.seg.Segment)12 DijkstraSegment (com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment)8 LinkedList (java.util.LinkedList)7 CRFSegment (com.hankcs.hanlp.seg.CRF.CRFSegment)5 ResultTerm (com.hankcs.hanlp.seg.common.ResultTerm)5 Vertex (com.hankcs.hanlp.seg.common.Vertex)5 CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)4 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)4 DoubleArrayTrieSegment (com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment)4 ViterbiSegment (com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)4 ArrayList (java.util.ArrayList)4 Nature (com.hankcs.hanlp.corpus.tag.Nature)3 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)3 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)2 Filter (com.hankcs.hanlp.dictionary.stopword.Filter)2 Table (com.hankcs.hanlp.model.crf.Table)2 HMMSegment (com.hankcs.hanlp.seg.HMM.HMMSegment)2 AtomNode (com.hankcs.hanlp.seg.NShort.Path.AtomNode)2 File (java.io.File)2