use of com.hankcs.hanlp.algoritm.ahocorasick.trie.Token in project HanLP by hankcs.
the class TonePinyinString2PinyinConverter method convert.
/**
*
* @param tonePinyinText
* @return
*/
public static List<Pinyin> convert(String tonePinyinText, boolean removeNull) {
List<Pinyin> pinyinList = new LinkedList<Pinyin>();
Collection<Token> tokenize = trie.tokenize(tonePinyinText);
for (Token token : tokenize) {
Pinyin pinyin = mapKey.get(token.getFragment());
if (removeNull && pinyin == null)
continue;
pinyinList.add(pinyin);
}
return pinyinList;
}
use of com.hankcs.hanlp.algoritm.ahocorasick.trie.Token in project HanLP by hankcs.
the class String2PinyinConverter method convert2Pair.
/**
* 将混合文本转为拼音
* @param complexText 混合汉字、拼音、输入法头的文本,比如“飞流zh下sqianch”
* @param removeTone
* @return 一个键值对,键为拼音列表,值为类型(true表示这是一个拼音,false表示这是一个输入法头)
*/
public static Pair<List<Pinyin>, List<Boolean>> convert2Pair(String complexText, boolean removeTone) {
List<Pinyin> pinyinList = new LinkedList<Pinyin>();
List<Boolean> booleanList = new LinkedList<Boolean>();
Collection<Token> tokenize = trie.tokenize(complexText);
for (Token token : tokenize) {
String fragment = token.getFragment();
if (token.isMatch()) {
// 是拼音或拼音的一部分,用map转
Pinyin pinyin = convertSingle(fragment);
pinyinList.add(pinyin);
if (fragment.length() == pinyin.getPinyinWithoutTone().length()) {
booleanList.add(true);
} else {
booleanList.add(false);
}
} else {
List<Pinyin> pinyinListFragment = PinyinDictionary.convertToPinyin(fragment);
pinyinList.addAll(pinyinListFragment);
for (int i = 0; i < pinyinListFragment.size(); ++i) {
booleanList.add(true);
}
}
}
makeToneToTheSame(pinyinList);
return new Pair<List<Pinyin>, List<Boolean>>(pinyinList, booleanList);
}
use of com.hankcs.hanlp.algoritm.ahocorasick.trie.Token in project HanLP by hankcs.
the class String2PinyinConverter method convert.
/**
* 文本转拼音
* @param complexText
* @return
*/
public static List<Pinyin> convert(String complexText) {
List<Pinyin> pinyinList = new LinkedList<Pinyin>();
Collection<Token> tokenize = trie.tokenize(complexText);
// System.out.println(tokenize);
for (Token token : tokenize) {
String fragment = token.getFragment();
if (token.isMatch()) {
// 是拼音或拼音的一部分,用map转
pinyinList.add(convertSingle(fragment));
} else {
pinyinList.addAll(PinyinDictionary.convertToPinyin(fragment));
}
}
return pinyinList;
}