use of com.hankcs.hanlp.collection.dartsclone.Pair in project HanLP by hankcs.
the class String2PinyinConverter method convert2Pair.
/**
* 将混合文本转为拼音
* @param complexText 混合汉字、拼音、输入法头的文本,比如“飞流zh下sqianch”
* @param removeTone
* @return 一个键值对,键为拼音列表,值为类型(true表示这是一个拼音,false表示这是一个输入法头)
*/
public static Pair<List<Pinyin>, List<Boolean>> convert2Pair(String complexText, boolean removeTone) {
List<Pinyin> pinyinList = new LinkedList<Pinyin>();
List<Boolean> booleanList = new LinkedList<Boolean>();
Collection<Token> tokenize = trie.tokenize(complexText);
for (Token token : tokenize) {
String fragment = token.getFragment();
if (token.isMatch()) {
// 是拼音或拼音的一部分,用map转
Pinyin pinyin = convertSingle(fragment);
pinyinList.add(pinyin);
if (fragment.length() == pinyin.getPinyinWithoutTone().length()) {
booleanList.add(true);
} else {
booleanList.add(false);
}
} else {
List<Pinyin> pinyinListFragment = PinyinDictionary.convertToPinyin(fragment);
pinyinList.addAll(pinyinListFragment);
for (int i = 0; i < pinyinListFragment.size(); ++i) {
booleanList.add(true);
}
}
}
makeToneToTheSame(pinyinList);
return new Pair<List<Pinyin>, List<Boolean>>(pinyinList, booleanList);
}
use of com.hankcs.hanlp.collection.dartsclone.Pair in project HanLP by hankcs.
the class MaxEntDependencyParser method makeEdge.
@Override
protected Edge makeEdge(Node[] nodeArray, int from, int to) {
LinkedList<String> context = new LinkedList<String>();
int index = from;
for (int i = index - 2; i < index + 2 + 1; ++i) {
Node w = i >= 0 && i < nodeArray.length ? nodeArray[i] : Node.NULL;
// 在尾巴上做个标记,不然特征冲突了
context.add(w.compiledWord + "i" + (i - index));
context.add(w.label + "i" + (i - index));
}
index = to;
for (int i = index - 2; i < index + 2 + 1; ++i) {
Node w = i >= 0 && i < nodeArray.length ? nodeArray[i] : Node.NULL;
// 在尾巴上做个标记,不然特征冲突了
context.add(w.compiledWord + "j" + (i - index));
context.add(w.label + "j" + (i - index));
}
context.add(nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord);
context.add(nodeArray[from].label + '→' + nodeArray[to].label);
context.add(nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord + (from - to));
context.add(nodeArray[from].label + '→' + nodeArray[to].label + (from - to));
Node wordBeforeI = from - 1 >= 0 ? nodeArray[from - 1] : Node.NULL;
Node wordBeforeJ = to - 1 >= 0 ? nodeArray[to - 1] : Node.NULL;
context.add(wordBeforeI.compiledWord + '@' + nodeArray[from].compiledWord + '→' + nodeArray[to].compiledWord);
context.add(nodeArray[from].compiledWord + '→' + wordBeforeJ.compiledWord + '@' + nodeArray[to].compiledWord);
context.add(wordBeforeI.label + '@' + nodeArray[from].label + '→' + nodeArray[to].label);
context.add(nodeArray[from].label + '→' + wordBeforeJ.label + '@' + nodeArray[to].label);
List<Pair<String, Double>> pairList = model.predict(context.toArray(new String[0]));
Pair<String, Double> maxPair = new Pair<String, Double>("null", -1.0);
// System.out.println(pairList);
for (Pair<String, Double> pair : pairList) {
if (pair.getValue() > maxPair.getValue() && !"null".equals(pair.getKey())) {
maxPair = pair;
}
}
return new Edge(from, to, maxPair.getKey(), (float) -Math.log(maxPair.getValue()));
}
Aggregations