use of com.hankcs.hanlp.seg.common.Graph in project HanLP by hankcs.
the class NShortSegment method BiOptimumSegment.
List<Vertex> BiOptimumSegment(WordNet wordNetOptimum) {
// logger.trace("细分词网:\n{}", wordNetOptimum);
Graph graph = GenerateBiGraph(wordNetOptimum);
if (HanLP.Config.DEBUG) {
System.out.printf("细分词图:%s\n", graph.printByTo());
}
NShortPath nShortPath = new NShortPath(graph, 1);
List<int[]> spResult = nShortPath.getNPaths(1);
assert spResult.size() > 0 : "最短路径求解失败,请检查下图是否有悬孤节点或负圈\n" + graph.printByTo();
return graph.parsePath(spResult.get(0));
}
use of com.hankcs.hanlp.seg.common.Graph in project HanLP by hankcs.
the class NShortSegment method segSentence.
@Override
public List<Term> segSentence(char[] sentence) {
WordNet wordNetOptimum = new WordNet(sentence);
WordNet wordNetAll = new WordNet(sentence);
// char[] charArray = text.toCharArray();
// 粗分
List<List<Vertex>> coarseResult = BiSegment(sentence, 2, wordNetOptimum, wordNetAll);
boolean NERexists = false;
for (List<Vertex> vertexList : coarseResult) {
if (HanLP.Config.DEBUG) {
System.out.println("粗分结果" + convert(vertexList, false));
}
// 实体命名识别
if (config.ner) {
wordNetOptimum.addAll(vertexList);
int preSize = wordNetOptimum.size();
if (config.nameRecognize) {
PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.translatedNameRecognize) {
TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.japaneseNameRecognize) {
JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.placeRecognize) {
PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.organizationRecognize) {
// 层叠隐马模型——生成输出作为下一级隐马输入
vertexList = Dijkstra.compute(GenerateBiGraph(wordNetOptimum));
wordNetOptimum.addAll(vertexList);
OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (!NERexists && preSize != wordNetOptimum.size()) {
NERexists = true;
}
}
}
List<Vertex> vertexList = coarseResult.get(0);
if (NERexists) {
Graph graph = GenerateBiGraph(wordNetOptimum);
vertexList = Dijkstra.compute(graph);
if (HanLP.Config.DEBUG) {
System.out.printf("细分词网:\n%s\n", wordNetOptimum);
System.out.printf("细分词图:%s\n", graph.printByTo());
}
}
// 数字识别
if (config.numberQuantifierRecognize) {
mergeNumberQuantifier(vertexList, wordNetAll, config);
}
// 如果是索引模式则全切分
if (config.indexMode) {
return decorateResultForIndexMode(vertexList, wordNetAll);
}
// 是否标注词性
if (config.speechTagging) {
speechTagging(vertexList);
}
if (config.useCustomDictionary) {
if (config.indexMode)
combineByCustomDictionary(vertexList, wordNetAll);
else
combineByCustomDictionary(vertexList);
}
return convert(vertexList, config.offset);
}
use of com.hankcs.hanlp.seg.common.Graph in project HanLP by hankcs.
the class NShortSegment method BiSegment.
/**
* 二元语言模型分词
* @param sSentence 待分词的句子
* @param nKind 需要几个结果
* @param wordNetOptimum
* @param wordNetAll
* @return 一系列粗分结果
*/
public List<List<Vertex>> BiSegment(char[] sSentence, int nKind, WordNet wordNetOptimum, WordNet wordNetAll) {
List<List<Vertex>> coarseResult = new LinkedList<List<Vertex>>();
////////////////生成词网////////////////////
GenerateWordNet(wordNetAll);
// logger.trace("词网大小:" + wordNetAll.size());
// logger.trace("打印词网:\n" + wordNetAll);
///////////////生成词图////////////////////
Graph graph = GenerateBiGraph(wordNetAll);
// logger.trace(graph.toString());
if (HanLP.Config.DEBUG) {
System.out.printf("打印词图:%s\n", graph.printByTo());
}
///////////////N-最短路径////////////////////
NShortPath nShortPath = new NShortPath(graph, nKind);
List<int[]> spResult = nShortPath.getNPaths(nKind * 2);
if (spResult.size() == 0) {
throw new RuntimeException(nKind + "-最短路径求解失败,请检查上面的词网是否存在负圈或悬孤节点");
}
//////////////日期、数字合并策略
for (int[] path : spResult) {
List<Vertex> vertexes = graph.parsePath(path);
GenerateWord(vertexes, wordNetOptimum);
coarseResult.add(vertexes);
}
return coarseResult;
}