Search in sources :

Example 1 with Token

use of com.hankcs.hanlp.algoritm.ahocorasick.trie.Token in project HanLP by hankcs.

the class TonePinyinString2PinyinConverter method convert.

/**
     *
     * @param tonePinyinText
     * @return
     */
public static List<Pinyin> convert(String tonePinyinText, boolean removeNull) {
    List<Pinyin> pinyinList = new LinkedList<Pinyin>();
    Collection<Token> tokenize = trie.tokenize(tonePinyinText);
    for (Token token : tokenize) {
        Pinyin pinyin = mapKey.get(token.getFragment());
        if (removeNull && pinyin == null)
            continue;
        pinyinList.add(pinyin);
    }
    return pinyinList;
}
Also used : Token(com.hankcs.hanlp.algoritm.ahocorasick.trie.Token)

Example 2 with Token

use of com.hankcs.hanlp.algoritm.ahocorasick.trie.Token in project HanLP by hankcs.

the class String2PinyinConverter method convert2Pair.

/**
     * 将混合文本转为拼音
     * @param complexText 混合汉字、拼音、输入法头的文本,比如“飞流zh下sqianch”
     * @param removeTone
     * @return 一个键值对,键为拼音列表,值为类型(true表示这是一个拼音,false表示这是一个输入法头)
     */
public static Pair<List<Pinyin>, List<Boolean>> convert2Pair(String complexText, boolean removeTone) {
    List<Pinyin> pinyinList = new LinkedList<Pinyin>();
    List<Boolean> booleanList = new LinkedList<Boolean>();
    Collection<Token> tokenize = trie.tokenize(complexText);
    for (Token token : tokenize) {
        String fragment = token.getFragment();
        if (token.isMatch()) {
            // 是拼音或拼音的一部分,用map转
            Pinyin pinyin = convertSingle(fragment);
            pinyinList.add(pinyin);
            if (fragment.length() == pinyin.getPinyinWithoutTone().length()) {
                booleanList.add(true);
            } else {
                booleanList.add(false);
            }
        } else {
            List<Pinyin> pinyinListFragment = PinyinDictionary.convertToPinyin(fragment);
            pinyinList.addAll(pinyinListFragment);
            for (int i = 0; i < pinyinListFragment.size(); ++i) {
                booleanList.add(true);
            }
        }
    }
    makeToneToTheSame(pinyinList);
    return new Pair<List<Pinyin>, List<Boolean>>(pinyinList, booleanList);
}
Also used : Token(com.hankcs.hanlp.algoritm.ahocorasick.trie.Token) Pair(com.hankcs.hanlp.collection.dartsclone.Pair)

Example 3 with Token

use of com.hankcs.hanlp.algoritm.ahocorasick.trie.Token in project HanLP by hankcs.

the class String2PinyinConverter method convert.

/**
     * 文本转拼音
     * @param complexText
     * @return
     */
public static List<Pinyin> convert(String complexText) {
    List<Pinyin> pinyinList = new LinkedList<Pinyin>();
    Collection<Token> tokenize = trie.tokenize(complexText);
    //        System.out.println(tokenize);
    for (Token token : tokenize) {
        String fragment = token.getFragment();
        if (token.isMatch()) {
            // 是拼音或拼音的一部分,用map转
            pinyinList.add(convertSingle(fragment));
        } else {
            pinyinList.addAll(PinyinDictionary.convertToPinyin(fragment));
        }
    }
    return pinyinList;
}
Also used : Token(com.hankcs.hanlp.algoritm.ahocorasick.trie.Token)

Aggregations

Token (com.hankcs.hanlp.algoritm.ahocorasick.trie.Token)3 Pair (com.hankcs.hanlp.collection.dartsclone.Pair)1