use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class NShortSegment method BiSegment.
/**
* 二元语言模型分词
* @param sSentence 待分词的句子
* @param nKind 需要几个结果
* @param wordNetOptimum
* @param wordNetAll
* @return 一系列粗分结果
*/
public List<List<Vertex>> BiSegment(char[] sSentence, int nKind, WordNet wordNetOptimum, WordNet wordNetAll) {
List<List<Vertex>> coarseResult = new LinkedList<List<Vertex>>();
////////////////生成词网////////////////////
GenerateWordNet(wordNetAll);
// logger.trace("词网大小:" + wordNetAll.size());
// logger.trace("打印词网:\n" + wordNetAll);
///////////////生成词图////////////////////
Graph graph = GenerateBiGraph(wordNetAll);
// logger.trace(graph.toString());
if (HanLP.Config.DEBUG) {
System.out.printf("打印词图:%s\n", graph.printByTo());
}
///////////////N-最短路径////////////////////
NShortPath nShortPath = new NShortPath(graph, nKind);
List<int[]> spResult = nShortPath.getNPaths(nKind * 2);
if (spResult.size() == 0) {
throw new RuntimeException(nKind + "-最短路径求解失败,请检查上面的词网是否存在负圈或悬孤节点");
}
//////////////日期、数字合并策略
for (int[] path : spResult) {
List<Vertex> vertexes = graph.parsePath(path);
GenerateWord(vertexes, wordNetOptimum);
coarseResult.add(vertexes);
}
return coarseResult;
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class AtomNode method convert.
public static Vertex convert(String word, int type) {
String name = word;
Nature nature = Nature.n;
int dValue = 1;
switch(type) {
case Predefine.CT_CHINESE:
break;
case Predefine.CT_INDEX:
case Predefine.CT_NUM:
nature = Nature.m;
word = "未##数";
break;
case Predefine.CT_DELIMITER:
nature = Nature.w;
break;
case Predefine.CT_LETTER:
nature = Nature.nx;
word = "未##串";
break;
case //12021-2129-3121
Predefine.CT_SINGLE:
// if (Pattern.compile("^(-?\\d+)(\\.\\d+)?$").matcher(word).matches())//匹配浮点数
// {
// nature = Nature.m;
// word = "未##数";
// } else
// {
nature = Nature.nx;
word = "未##串";
// }
break;
default:
break;
}
return new Vertex(word, name, new CoreDictionary.Attribute(nature, dValue));
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class TranslatedPersonRecognition method Recognition.
/**
* 执行识别
* @param segResult 粗分结果
* @param wordNetOptimum 粗分结果对应的词图
* @param wordNetAll 全词图
*/
public static void Recognition(List<Vertex> segResult, WordNet wordNetOptimum, WordNet wordNetAll) {
StringBuilder sbName = new StringBuilder();
int appendTimes = 0;
ListIterator<Vertex> listIterator = segResult.listIterator();
listIterator.next();
int line = 1;
int activeLine = 1;
while (listIterator.hasNext()) {
Vertex vertex = listIterator.next();
if (appendTimes > 0) {
if (vertex.guessNature() == Nature.nrf || TranslatedPersonDictionary.containsKey(vertex.realWord)) {
sbName.append(vertex.realWord);
++appendTimes;
} else {
// 识别结束
if (appendTimes > 1) {
if (HanLP.Config.DEBUG) {
System.out.println("音译人名识别出:" + sbName.toString());
}
wordNetOptimum.insert(activeLine, new Vertex(Predefine.TAG_PEOPLE, sbName.toString(), new CoreDictionary.Attribute(Nature.nrf), WORD_ID), wordNetAll);
}
sbName.setLength(0);
appendTimes = 0;
}
} else {
// nrf和nsf触发识别
if (vertex.guessNature() == Nature.nrf || vertex.getNature() == Nature.nsf) // || TranslatedPersonDictionary.containsKey(vertex.realWord)
{
sbName.append(vertex.realWord);
++appendTimes;
activeLine = line;
}
}
line += vertex.realWord.length();
}
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class PlaceRecognition method insert.
private static void insert(ListIterator<Vertex> listIterator, List<EnumItem<NS>> tagList, WordNet wordNetAll, int line, NS ns) {
Vertex vertex = wordNetAll.getFirst(line);
assert vertex != null : "全词网居然有空白行!";
listIterator.add(vertex);
tagList.add(new EnumItem<NS>(ns, 1000));
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class OrganizationRecognition method Recognition.
public static boolean Recognition(List<Vertex> pWordSegResult, WordNet wordNetOptimum, WordNet wordNetAll) {
List<EnumItem<NT>> roleTagList = roleTag(pWordSegResult, wordNetAll);
if (HanLP.Config.DEBUG) {
StringBuilder sbLog = new StringBuilder();
Iterator<Vertex> iterator = pWordSegResult.iterator();
for (EnumItem<NT> NTEnumItem : roleTagList) {
sbLog.append('[');
sbLog.append(iterator.next().realWord);
sbLog.append(' ');
sbLog.append(NTEnumItem);
sbLog.append(']');
}
System.out.printf("机构名角色观察:%s\n", sbLog.toString());
}
List<NT> NTList = viterbiExCompute(roleTagList);
if (HanLP.Config.DEBUG) {
StringBuilder sbLog = new StringBuilder();
Iterator<Vertex> iterator = pWordSegResult.iterator();
sbLog.append('[');
for (NT NT : NTList) {
sbLog.append(iterator.next().realWord);
sbLog.append('/');
sbLog.append(NT);
sbLog.append(" ,");
}
if (sbLog.length() > 1)
sbLog.delete(sbLog.length() - 2, sbLog.length());
sbLog.append(']');
System.out.printf("机构名角色标注:%s\n", sbLog.toString());
}
OrganizationDictionary.parsePattern(NTList, pWordSegResult, wordNetOptimum, wordNetAll);
return true;
}
Aggregations