Search in sources :

Example 1 with Node

use of com.hankcs.hanlp.dependency.common.Node in project HanLP by hankcs.

the class MaxEntDependencyParser method makeEdge.

@Override
protected Edge makeEdge(Node[] nodeArray, int from, int to) {
    LinkedList<String> context = new LinkedList<String>();
    int index = from;
    for (int i = index - 2; i < index + 2 + 1; ++i) {
        Node w = i >= 0 && i < nodeArray.length ? nodeArray[i] : Node.NULL;
        // 在尾巴上做个标记,不然特征冲突了
        context.add(w.compiledWord + "i" + (i - index));
        context.add(w.label + "i" + (i - index));
    }
    index = to;
    for (int i = index - 2; i < index + 2 + 1; ++i) {
        Node w = i >= 0 && i < nodeArray.length ? nodeArray[i] : Node.NULL;
        // 在尾巴上做个标记,不然特征冲突了
        context.add(w.compiledWord + "j" + (i - index));
        context.add(w.label + "j" + (i - index));
    }
    context.add(nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord);
    context.add(nodeArray[from].label + '→' + nodeArray[to].label);
    context.add(nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord + (from - to));
    context.add(nodeArray[from].label + '→' + nodeArray[to].label + (from - to));
    Node wordBeforeI = from - 1 >= 0 ? nodeArray[from - 1] : Node.NULL;
    Node wordBeforeJ = to - 1 >= 0 ? nodeArray[to - 1] : Node.NULL;
    context.add(wordBeforeI.compiledWord + '@' + nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord);
    context.add(nodeArray[from].compiledWord + '→' + wordBeforeJ.compiledWord + '@' + nodeArray[to].compiledWord);
    context.add(wordBeforeI.label + '@' + nodeArray[from].label + '→' + nodeArray[to].label);
    context.add(nodeArray[from].label + '→' + wordBeforeJ.label + '@' + nodeArray[to].label);
    List<Pair<String, Double>> pairList = model.predict(context.toArray(new String[0]));
    Pair<String, Double> maxPair = new Pair<String, Double>("null", -1.0);
    //        System.out.println(pairList);
    for (Pair<String, Double> pair : pairList) {
        if (pair.getValue() > maxPair.getValue() && !"null".equals(pair.getKey())) {
            maxPair = pair;
        }
    }
    return new Edge(from, to, maxPair.getKey(), (float) -Math.log(maxPair.getValue()));
}
Also used : Node(com.hankcs.hanlp.dependency.common.Node) Edge(com.hankcs.hanlp.dependency.common.Edge) LinkedList(java.util.LinkedList) Pair(com.hankcs.hanlp.collection.dartsclone.Pair)

Example 2 with Node

use of com.hankcs.hanlp.dependency.common.Node in project HanLP by hankcs.

the class MinimumSpanningTreeParser method parse.

@Override
public CoNLLSentence parse(List<Term> termList) {
    if (termList == null || termList.size() == 0)
        return null;
    termList.add(0, new Term("##核心##", Nature.begin));
    Node[] nodeArray = new Node[termList.size()];
    Iterator<Term> iterator = termList.iterator();
    for (int i = 0; i < nodeArray.length; ++i) {
        nodeArray[i] = new Node(iterator.next(), i);
    }
    Edge[][] edges = new Edge[nodeArray.length][nodeArray.length];
    for (int i = 0; i < edges.length; ++i) {
        for (int j = 0; j < edges[i].length; ++j) {
            if (i != j) {
                edges[j][i] = makeEdge(nodeArray, i, j);
            }
        }
    }
    // 最小生成树Prim算法
    int max_v = nodeArray.length * (nodeArray.length - 1);
    float[] mincost = new float[max_v];
    Arrays.fill(mincost, Float.MAX_VALUE / 3);
    boolean[] used = new boolean[max_v];
    Arrays.fill(used, false);
    used[0] = true;
    PriorityQueue<State> que = new PriorityQueue<State>();
    // 找虚根的唯一孩子
    float minCostToRoot = Float.MAX_VALUE;
    Edge firstEdge = null;
    Edge[] edgeResult = new Edge[termList.size() - 1];
    for (Edge edge : edges[0]) {
        if (edge == null)
            continue;
        if (minCostToRoot > edge.cost) {
            firstEdge = edge;
            minCostToRoot = edge.cost;
        }
    }
    if (firstEdge == null)
        return null;
    que.add(new State(minCostToRoot, firstEdge.from, firstEdge));
    while (!que.isEmpty()) {
        State p = que.poll();
        int v = p.id;
        if (used[v] || p.cost > mincost[v])
            continue;
        used[v] = true;
        if (p.edge != null) {
            //                System.out.println(p.edge.from + " " + p.edge.to + p.edge.label);
            edgeResult[p.edge.from - 1] = p.edge;
        }
        for (Edge e : edges[v]) {
            if (e == null)
                continue;
            if (mincost[e.from] > e.cost) {
                mincost[e.from] = e.cost;
                que.add(new State(mincost[e.from], e.from, e));
            }
        }
    }
    CoNLLWord[] wordArray = new CoNLLWord[termList.size() - 1];
    for (int i = 0; i < wordArray.length; ++i) {
        wordArray[i] = new CoNLLWord(i + 1, nodeArray[i + 1].word, nodeArray[i + 1].label);
        wordArray[i].DEPREL = edgeResult[i].label;
    }
    for (int i = 0; i < edgeResult.length; ++i) {
        int index = edgeResult[i].to - 1;
        if (index < 0) {
            wordArray[i].HEAD = CoNLLWord.ROOT;
            continue;
        }
        wordArray[i].HEAD = wordArray[index];
    }
    return new CoNLLSentence(wordArray);
}
Also used : Node(com.hankcs.hanlp.dependency.common.Node) Term(com.hankcs.hanlp.seg.common.Term) PriorityQueue(java.util.PriorityQueue) State(com.hankcs.hanlp.dependency.common.State) CoNLLWord(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord) CoNLLSentence(com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence) Edge(com.hankcs.hanlp.dependency.common.Edge)

Aggregations

Edge (com.hankcs.hanlp.dependency.common.Edge)2 Node (com.hankcs.hanlp.dependency.common.Node)2 Pair (com.hankcs.hanlp.collection.dartsclone.Pair)1 CoNLLSentence (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLSentence)1 CoNLLWord (com.hankcs.hanlp.corpus.dependency.CoNll.CoNLLWord)1 State (com.hankcs.hanlp.dependency.common.State)1 Term (com.hankcs.hanlp.seg.common.Term)1 LinkedList (java.util.LinkedList)1 PriorityQueue (java.util.PriorityQueue)1