Search in sources :

Example 1 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class Dijkstra method compute.

public static List<Vertex> compute(Graph graph) {
    List<Vertex> resultList = new LinkedList<Vertex>();
    Vertex[] vertexes = graph.getVertexes();
    List<EdgeFrom>[] edgesTo = graph.getEdgesTo();
    double[] d = new double[vertexes.length];
    Arrays.fill(d, Double.MAX_VALUE);
    d[d.length - 1] = 0;
    int[] path = new int[vertexes.length];
    Arrays.fill(path, -1);
    PriorityQueue<State> que = new PriorityQueue<State>();
    que.add(new State(0, vertexes.length - 1));
    while (!que.isEmpty()) {
        State p = que.poll();
        if (d[p.vertex] < p.cost)
            continue;
        for (EdgeFrom edgeFrom : edgesTo[p.vertex]) {
            if (d[edgeFrom.from] > d[p.vertex] + edgeFrom.weight) {
                d[edgeFrom.from] = d[p.vertex] + edgeFrom.weight;
                que.add(new State(d[edgeFrom.from], edgeFrom.from));
                path[edgeFrom.from] = p.vertex;
            }
        }
    }
    for (int t = 0; t != -1; t = path[t]) {
        resultList.add(vertexes[t]);
    }
    return resultList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) PriorityQueue(java.util.PriorityQueue) LinkedList(java.util.LinkedList) EdgeFrom(com.hankcs.hanlp.seg.common.EdgeFrom) State(com.hankcs.hanlp.seg.Dijkstra.Path.State) List(java.util.List) LinkedList(java.util.LinkedList)

Example 2 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class Viterbi method compute.

/**
     * 特化版的求解HMM模型
     *
     * @param vertexList                包含Vertex.B节点的路径
     * @param transformMatrixDictionary 词典对应的转移矩阵
     */
public static void compute(List<Vertex> vertexList, TransformMatrixDictionary<Nature> transformMatrixDictionary) {
    int length = vertexList.size() - 1;
    // 滚动数组
    double[][] cost = new double[2][];
    Iterator<Vertex> iterator = vertexList.iterator();
    Vertex start = iterator.next();
    Nature pre = start.attribute.nature[0];
    // 第一个是确定的
    //        start.confirmNature(pre);
    // 第二个也可以简单地算出来
    Vertex preItem;
    Nature[] preTagSet;
    {
        Vertex item = iterator.next();
        cost[0] = new double[item.attribute.nature.length];
        int j = 0;
        int curIndex = 0;
        for (Nature cur : item.attribute.nature) {
            cost[0][j] = transformMatrixDictionary.transititon_probability[pre.ordinal()][cur.ordinal()] - Math.log((item.attribute.frequency[curIndex] + 1e-8) / transformMatrixDictionary.getTotalFrequency(cur));
            ++j;
            ++curIndex;
        }
        preTagSet = item.attribute.nature;
        preItem = item;
    }
    // 第三个开始复杂一些
    for (int i = 1; i < length; ++i) {
        int index_i = i & 1;
        int index_i_1 = 1 - index_i;
        Vertex item = iterator.next();
        cost[index_i] = new double[item.attribute.nature.length];
        double perfect_cost_line = Double.MAX_VALUE;
        int k = 0;
        Nature[] curTagSet = item.attribute.nature;
        for (Nature cur : curTagSet) {
            cost[index_i][k] = Double.MAX_VALUE;
            int j = 0;
            for (Nature p : preTagSet) {
                double now = cost[index_i_1][j] + transformMatrixDictionary.transititon_probability[p.ordinal()][cur.ordinal()] - Math.log((item.attribute.frequency[k] + 1e-8) / transformMatrixDictionary.getTotalFrequency(cur));
                if (now < cost[index_i][k]) {
                    cost[index_i][k] = now;
                    if (now < perfect_cost_line) {
                        perfect_cost_line = now;
                        pre = p;
                    }
                }
                ++j;
            }
            ++k;
        }
        preItem.confirmNature(pre);
        preTagSet = curTagSet;
        preItem = item;
    }
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature) Vertex(com.hankcs.hanlp.seg.common.Vertex)

Example 3 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class Segment method combineByCustomDictionary.

/**
     * 使用用户词典合并粗分结果,并将用户词语收集到全词图中
     * @param vertexList 粗分结果
     * @param wordNetAll 收集用户词语到全词图中
     * @return 合并后的结果
     */
protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList, WordNet wordNetAll) {
    Vertex[] wordNet = new Vertex[vertexList.size()];
    vertexList.toArray(wordNet);
    // DAT合并
    int line = 1;
    DoubleArrayTrie<CoreDictionary.Attribute> dat = CustomDictionary.dat;
    for (int i = 0; i < wordNet.length; ++i) {
        int state = 1;
        state = dat.transition(wordNet[i].realWord, state);
        if (state > 0) {
            int to = i + 1;
            int end = to;
            CoreDictionary.Attribute value = dat.output(state);
            for (; to < wordNet.length; ++to) {
                state = dat.transition(wordNet[to].realWord, state);
                if (state < 0)
                    break;
                CoreDictionary.Attribute output = dat.output(state);
                if (output != null) {
                    value = output;
                    end = to + 1;
                    combineWords(wordNet, i, end, value);
                    wordNetAll.add(line, wordNet[i]);
                }
            }
            if (value != null) {
                line += wordNet[i].realWord.length();
                i = end - 1;
            }
        } else {
            line += wordNet[i].realWord.length();
        }
    }
    // BinTrie合并
    if (CustomDictionary.trie != null) {
        line = 1;
        for (int i = 0; i < wordNet.length; ++i) {
            if (wordNet[i] == null)
                continue;
            BaseNode<CoreDictionary.Attribute> state = CustomDictionary.trie.transition(wordNet[i].realWord.toCharArray(), 0);
            if (state != null) {
                int to = i + 1;
                int end = to;
                CoreDictionary.Attribute value = state.getValue();
                for (; to < wordNet.length; ++to) {
                    if (wordNet[to] == null)
                        continue;
                    state = state.transition(wordNet[to].realWord.toCharArray(), 0);
                    if (state == null)
                        break;
                    if (state.getValue() != null) {
                        value = state.getValue();
                        end = to + 1;
                        combineWords(wordNet, i, end, value);
                        wordNetAll.add(line, wordNet[i]);
                    }
                }
                if (value != null) {
                    line += wordNet[i].realWord.length();
                    i = end - 1;
                }
            } else {
                line += wordNet[i].realWord.length();
            }
        }
    }
    vertexList.clear();
    for (Vertex vertex : wordNet) {
        if (vertex != null)
            vertexList.add(vertex);
    }
    return vertexList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) CoreDictionary(com.hankcs.hanlp.dictionary.CoreDictionary)

Example 4 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class Segment method combineByCustomDictionary.

/**
     * 使用用户词典合并粗分结果
     * @param vertexList 粗分结果
     * @return 合并后的结果
     */
protected static List<Vertex> combineByCustomDictionary(List<Vertex> vertexList) {
    Vertex[] wordNet = new Vertex[vertexList.size()];
    vertexList.toArray(wordNet);
    // DAT合并
    DoubleArrayTrie<CoreDictionary.Attribute> dat = CustomDictionary.dat;
    for (int i = 0; i < wordNet.length; ++i) {
        int state = 1;
        state = dat.transition(wordNet[i].realWord, state);
        if (state > 0) {
            int to = i + 1;
            int end = to;
            CoreDictionary.Attribute value = dat.output(state);
            for (; to < wordNet.length; ++to) {
                state = dat.transition(wordNet[to].realWord, state);
                if (state < 0)
                    break;
                CoreDictionary.Attribute output = dat.output(state);
                if (output != null) {
                    value = output;
                    end = to + 1;
                }
            }
            if (value != null) {
                combineWords(wordNet, i, end, value);
                i = end - 1;
            }
        }
    }
    // BinTrie合并
    if (CustomDictionary.trie != null) {
        for (int i = 0; i < wordNet.length; ++i) {
            if (wordNet[i] == null)
                continue;
            BaseNode<CoreDictionary.Attribute> state = CustomDictionary.trie.transition(wordNet[i].realWord.toCharArray(), 0);
            if (state != null) {
                int to = i + 1;
                int end = to;
                CoreDictionary.Attribute value = state.getValue();
                for (; to < wordNet.length; ++to) {
                    if (wordNet[to] == null)
                        continue;
                    state = state.transition(wordNet[to].realWord.toCharArray(), 0);
                    if (state == null)
                        break;
                    if (state.getValue() != null) {
                        value = state.getValue();
                        end = to + 1;
                    }
                }
                if (value != null) {
                    combineWords(wordNet, i, end, value);
                    i = end - 1;
                }
            }
        }
    }
    vertexList.clear();
    for (Vertex vertex : wordNet) {
        if (vertex != null)
            vertexList.add(vertex);
    }
    return vertexList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) CoreDictionary(com.hankcs.hanlp.dictionary.CoreDictionary)

Example 5 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class Segment method removeFromWordNet.

/**
     * 将一个词语从词网中彻底抹除
     * @param cur 词语
     * @param wordNetAll 词网
     * @param line 当前扫描的行数
     * @param length 当前缓冲区的长度
     */
private static void removeFromWordNet(Vertex cur, WordNet wordNetAll, int line, int length) {
    LinkedList<Vertex>[] vertexes = wordNetAll.getVertexes();
    // 将其从wordNet中删除
    for (Vertex vertex : vertexes[line + length]) {
        if (vertex.from == cur)
            vertex.from = null;
    }
    ListIterator<Vertex> iterator = vertexes[line + length - cur.realWord.length()].listIterator();
    while (iterator.hasNext()) {
        Vertex vertex = iterator.next();
        if (vertex == cur)
            iterator.remove();
    }
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex)

Aggregations

Vertex (com.hankcs.hanlp.seg.common.Vertex)33 EnumItem (com.hankcs.hanlp.corpus.dictionary.item.EnumItem)6 LinkedList (java.util.LinkedList)6 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)5 Term (com.hankcs.hanlp.seg.common.Term)5 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)4 NS (com.hankcs.hanlp.corpus.tag.NS)4 Nature (com.hankcs.hanlp.corpus.tag.Nature)4 NR (com.hankcs.hanlp.corpus.tag.NR)3 NT (com.hankcs.hanlp.corpus.tag.NT)3 Graph (com.hankcs.hanlp.seg.common.Graph)2 WordNet (com.hankcs.hanlp.seg.common.WordNet)2 DoubleArrayTrie (com.hankcs.hanlp.collection.trie.DoubleArrayTrie)1 CharTable (com.hankcs.hanlp.dictionary.other.CharTable)1 Table (com.hankcs.hanlp.model.crf.Table)1 State (com.hankcs.hanlp.seg.Dijkstra.Path.State)1 EdgeFrom (com.hankcs.hanlp.seg.common.EdgeFrom)1 List (java.util.List)1 PriorityQueue (java.util.PriorityQueue)1