Search in sources :

Example 1 with Pair

use of com.hankcs.hanlp.collection.dartsclone.Pair in project HanLP by hankcs.

the class String2PinyinConverter method convert2Pair.

/**
     * 将混合文本转为拼音
     * @param complexText 混合汉字、拼音、输入法头的文本,比如“飞流zh下sqianch”
     * @param removeTone
     * @return 一个键值对,键为拼音列表,值为类型(true表示这是一个拼音,false表示这是一个输入法头)
     */
public static Pair<List<Pinyin>, List<Boolean>> convert2Pair(String complexText, boolean removeTone) {
    List<Pinyin> pinyinList = new LinkedList<Pinyin>();
    List<Boolean> booleanList = new LinkedList<Boolean>();
    Collection<Token> tokenize = trie.tokenize(complexText);
    for (Token token : tokenize) {
        String fragment = token.getFragment();
        if (token.isMatch()) {
            // 是拼音或拼音的一部分,用map转
            Pinyin pinyin = convertSingle(fragment);
            pinyinList.add(pinyin);
            if (fragment.length() == pinyin.getPinyinWithoutTone().length()) {
                booleanList.add(true);
            } else {
                booleanList.add(false);
            }
        } else {
            List<Pinyin> pinyinListFragment = PinyinDictionary.convertToPinyin(fragment);
            pinyinList.addAll(pinyinListFragment);
            for (int i = 0; i < pinyinListFragment.size(); ++i) {
                booleanList.add(true);
            }
        }
    }
    makeToneToTheSame(pinyinList);
    return new Pair<List<Pinyin>, List<Boolean>>(pinyinList, booleanList);
}
Also used : Token(com.hankcs.hanlp.algoritm.ahocorasick.trie.Token) Pair(com.hankcs.hanlp.collection.dartsclone.Pair)

Example 2 with Pair

use of com.hankcs.hanlp.collection.dartsclone.Pair in project HanLP by hankcs.

the class MaxEntDependencyParser method makeEdge.

@Override
protected Edge makeEdge(Node[] nodeArray, int from, int to) {
    LinkedList<String> context = new LinkedList<String>();
    int index = from;
    for (int i = index - 2; i < index + 2 + 1; ++i) {
        Node w = i >= 0 && i < nodeArray.length ? nodeArray[i] : Node.NULL;
        // 在尾巴上做个标记,不然特征冲突了
        context.add(w.compiledWord + "i" + (i - index));
        context.add(w.label + "i" + (i - index));
    }
    index = to;
    for (int i = index - 2; i < index + 2 + 1; ++i) {
        Node w = i >= 0 && i < nodeArray.length ? nodeArray[i] : Node.NULL;
        // 在尾巴上做个标记,不然特征冲突了
        context.add(w.compiledWord + "j" + (i - index));
        context.add(w.label + "j" + (i - index));
    }
    context.add(nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord);
    context.add(nodeArray[from].label + '→' + nodeArray[to].label);
    context.add(nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord + (from - to));
    context.add(nodeArray[from].label + '→' + nodeArray[to].label + (from - to));
    Node wordBeforeI = from - 1 >= 0 ? nodeArray[from - 1] : Node.NULL;
    Node wordBeforeJ = to - 1 >= 0 ? nodeArray[to - 1] : Node.NULL;
    context.add(wordBeforeI.compiledWord + '@' + nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord);
    context.add(nodeArray[from].compiledWord + '→' + wordBeforeJ.compiledWord + '@' + nodeArray[to].compiledWord);
    context.add(wordBeforeI.label + '@' + nodeArray[from].label + '→' + nodeArray[to].label);
    context.add(nodeArray[from].label + '→' + wordBeforeJ.label + '@' + nodeArray[to].label);
    List<Pair<String, Double>> pairList = model.predict(context.toArray(new String[0]));
    Pair<String, Double> maxPair = new Pair<String, Double>("null", -1.0);
    //        System.out.println(pairList);
    for (Pair<String, Double> pair : pairList) {
        if (pair.getValue() > maxPair.getValue() && !"null".equals(pair.getKey())) {
            maxPair = pair;
        }
    }
    return new Edge(from, to, maxPair.getKey(), (float) -Math.log(maxPair.getValue()));
}
Also used : Node(com.hankcs.hanlp.dependency.common.Node) Edge(com.hankcs.hanlp.dependency.common.Edge) LinkedList(java.util.LinkedList) Pair(com.hankcs.hanlp.collection.dartsclone.Pair)

Aggregations

Pair (com.hankcs.hanlp.collection.dartsclone.Pair)2 Token (com.hankcs.hanlp.algoritm.ahocorasick.trie.Token)1 Edge (com.hankcs.hanlp.dependency.common.Edge)1 Node (com.hankcs.hanlp.dependency.common.Node)1 LinkedList (java.util.LinkedList)1