Search in sources :

Example 46 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class DemoCRFSegment method main.

public static void main(String[] args) {
    // 关闭词性显示
    HanLP.Config.ShowTermNature = false;
    Segment segment = new CRFSegment().enableCustomDictionary(false);
    String[] sentenceArray = new String[] { "HanLP是由一系列模型与算法组成的Java工具包,目标是普及自然语言处理在生产环境中的应用。", // 繁体无压力
    "鐵桿部隊憤怒情緒集結 馬英九腹背受敵", "馬英九回應連勝文“丐幫說”:稱黨內同志談話應謹慎", // 专业名词有一定辨识能力
    "高锰酸钾,强氧化剂,紫红色晶体,可溶于水,遇乙醇即被还原。常用作消毒剂、水净化剂、氧化剂、漂白剂、毒气吸收剂、二氧化碳精制剂等。", // 非新闻语料
    "《夜晚的骰子》通过描述浅草的舞女在暗夜中扔骰子的情景,寄托了作者对庶民生活区的情感", // 微博
    "这个像是真的[委屈]前面那个打扮太江户了,一点不上品...@hankcs", "鼎泰丰的小笼一点味道也没有...每样都淡淡的...淡淡的,哪有食堂2A的好次", "克里斯蒂娜·克罗尔说:不,我不是虎妈。我全家都热爱音乐,我也鼓励他们这么做。", "今日APPS:Sago Mini Toolbox培养孩子动手能力", "财政部副部长王保安调任国家统计局党组书记", "2.34米男子娶1.53米女粉丝 称夫妻生活没问题", "你看过穆赫兰道吗", "国办发布网络提速降费十四条指导意见 鼓励流量不清零", "乐视超级手机能否承载贾布斯的生态梦" };
    for (String sentence : sentenceArray) {
        List<Term> termList = segment.seg(sentence);
        System.out.println(termList);
    }
    /**
         * 内存CookBook:
         * HanLP内部有智能的内存池,对于同一个CRF模型(模型文件路径作为id区分),只要它没被释放或者内存充足,就不会重新加载。
         */
    for (int i = 0; i < 5; ++i) {
        segment = new CRFSegment();
    }
}
Also used : Term(com.hankcs.hanlp.seg.common.Term) Segment(com.hankcs.hanlp.seg.Segment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment) CRFSegment(com.hankcs.hanlp.seg.CRF.CRFSegment)

Example 47 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class DemoJapaneseNameRecognition method main.

public static void main(String[] args) {
    String[] testCase = new String[] { "北川景子参演了林诣彬导演的《速度与激情3》", "林志玲亮相网友:确定不是波多野结衣?", "龟山千广和近藤公园在龟山公园里喝酒赏花" };
    Segment segment = HanLP.newSegment().enableJapaneseNameRecognize(true);
    for (String sentence : testCase) {
        List<Term> termList = segment.seg(sentence);
        System.out.println(termList);
    }
}
Also used : Term(com.hankcs.hanlp.seg.common.Term) Segment(com.hankcs.hanlp.seg.Segment)

Example 48 with Term

use of com.hankcs.hanlp.seg.common.Term in project HanLP by hankcs.

the class MinimumSpanningTreeParser method parse.

@Override
public CoNLLSentence parse(List<Term> termList) {
    if (termList == null || termList.size() == 0)
        return null;
    termList.add(0, new Term("##核心##", Nature.begin));
    Node[] nodeArray = new Node[termList.size()];
    Iterator<Term> iterator = termList.iterator();
    for (int i = 0; i < nodeArray.length; ++i) {
        nodeArray[i] = new Node(iterator.next(), i);
    }
    Edge[][] edges = new Edge[nodeArray.length][nodeArray.length];
    for (int i = 0; i < edges.length; ++i) {
        for (int j = 0; j < edges[i].length; ++j) {
            if (i != j) {
                edges[j][i] = makeEdge(nodeArray, i, j);
            }
        }
    }
    // 最小生成树Prim算法
    int max_v = nodeArray.length * (nodeArray.length - 1);
    float[] mincost = new float[max_v];
    Arrays.fill(mincost, Float.MAX_VALUE / 3);
    boolean[] used = new boolean[max_v];
    Arrays.fill(used, false);
    used[0] = true;
    PriorityQueue<State> que = new PriorityQueue<State>();
    // 找虚根的唯一孩子
    float minCostToRoot = Float.MAX_VALUE;
    Edge firstEdge = null;
    Edge[] edgeResult = new Edge[termList.size() - 1];
    for (Edge edge : edges[0]) {
        if (edge == null)
            continue;
        if (minCostToRoot > edge.cost) {
            firstEdge = edge;
            minCostToRoot = edge.cost;
        }
    }
    if (firstEdge == null)
        return null;
    que.add(new State(minCostToRoot, firstEdge.from, firstEdge));
    while (!que.isEmpty()) {
        State p = que.poll();
        int v = p.id;
        if (used[v] || p.cost > mincost[v])
            continue;
        used[v] = true;
        if (p.edge != null) {
            //                System.out.println(p.edge.from + " " + p.edge.to + p.edge.label);
            edgeResult[p.edge.from - 1] = p.edge;
        }
        for (Edge e : edges[v]) {
            if (e == null)
                continue;
            if (mincost[e.from] > e.cost) {
                mincost[e.from] = e.cost;
                que.add(new State(mincost[e.from], e.from, e));
            }
        }
    }
    CoNLLWord[] wordArray = new CoNLLWord[termList.size() - 1];
    for (int i = 0; i < wordArray.length; ++i) {
        wordArray[i] = new CoNLLWord(i + 1, nodeArray[i + 1].word, nodeArray[i + 1].label);
        wordArray[i].DEPREL = edgeResult[i].label;
    }
    for (int i = 0; i < edgeResult.length; ++i) {
        int index = edgeResult[i].to - 1;
        if (index < 0) {
            wordArray[i].HEAD = CoNLLWord.ROOT;
            continue;
        }
        wordArray[i].HEAD = wordArray[index];
    }
    return new CoNLLSentence(wordArray);
}
Also used : Node(com.hankcs.hanlp.dependency.common.Node) Term(com.hankcs.hanlp.seg.common.Term) PriorityQueue(java.util.PriorityQueue) State(com.hankcs.hanlp.dependency.common.State) CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Edge(com.hankcs.hanlp.dependency.common.Edge)

Aggregations

Term (com.hankcs.hanlp.seg.common.Term)48 Segment (com.hankcs.hanlp.seg.Segment)12 DijkstraSegment (com.hankcs.hanlp.seg.Dijkstra.DijkstraSegment)8 LinkedList (java.util.LinkedList)7 CRFSegment (com.hankcs.hanlp.seg.CRF.CRFSegment)5 ResultTerm (com.hankcs.hanlp.seg.common.ResultTerm)5 Vertex (com.hankcs.hanlp.seg.common.Vertex)5 CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)4 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)4 DoubleArrayTrieSegment (com.hankcs.hanlp.seg.Other.DoubleArrayTrieSegment)4 ViterbiSegment (com.hankcs.hanlp.seg.Viterbi.ViterbiSegment)4 ArrayList (java.util.ArrayList)4 Nature (com.hankcs.hanlp.corpus.tag.Nature)3 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)3 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)2 Filter (com.hankcs.hanlp.dictionary.stopword.Filter)2 Table (com.hankcs.hanlp.model.crf.Table)2 HMMSegment (com.hankcs.hanlp.seg.HMM.HMMSegment)2 AtomNode (com.hankcs.hanlp.seg.NShort.Path.AtomNode)2 File (java.io.File)2