use of com.hankcs.hanlp.seg.common.WordNet in project HanLP by hankcs.
the class ViterbiSegment method segSentence.
@Override
protected List<Term> segSentence(char[] sentence) {
// long start = System.currentTimeMillis();
WordNet wordNetAll = new WordNet(sentence);
////////////////生成词网////////////////////
GenerateWordNet(wordNetAll);
// System.out.println("构图:" + (System.currentTimeMillis() - start));
if (HanLP.Config.DEBUG) {
System.out.printf("粗分词网:\n%s\n", wordNetAll);
}
// start = System.currentTimeMillis();
List<Vertex> vertexList = viterbi(wordNetAll);
if (config.useCustomDictionary) {
if (config.indexMode)
combineByCustomDictionary(vertexList, wordNetAll);
else
combineByCustomDictionary(vertexList);
}
if (HanLP.Config.DEBUG) {
System.out.println("粗分结果" + convert(vertexList, false));
}
// 数字识别
if (config.numberQuantifierRecognize) {
mergeNumberQuantifier(vertexList, wordNetAll, config);
}
// 实体命名识别
if (config.ner) {
WordNet wordNetOptimum = new WordNet(sentence, vertexList);
int preSize = wordNetOptimum.size();
if (config.nameRecognize) {
PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.translatedNameRecognize) {
TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.japaneseNameRecognize) {
JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.placeRecognize) {
PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.organizationRecognize) {
// 层叠隐马模型——生成输出作为下一级隐马输入
vertexList = viterbi(wordNetOptimum);
wordNetOptimum.clear();
wordNetOptimum.addAll(vertexList);
preSize = wordNetOptimum.size();
OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (wordNetOptimum.size() != preSize) {
vertexList = viterbi(wordNetOptimum);
if (HanLP.Config.DEBUG) {
System.out.printf("细分词网:\n%s\n", wordNetOptimum);
}
}
}
// 如果是索引模式则全切分
if (config.indexMode) {
return decorateResultForIndexMode(vertexList, wordNetAll);
}
// 是否标注词性
if (config.speechTagging) {
speechTagging(vertexList);
}
return convert(vertexList, config.offset);
}
use of com.hankcs.hanlp.seg.common.WordNet in project HanLP by hankcs.
the class NShortSegment method segSentence.
@Override
public List<Term> segSentence(char[] sentence) {
WordNet wordNetOptimum = new WordNet(sentence);
WordNet wordNetAll = new WordNet(sentence);
// char[] charArray = text.toCharArray();
// 粗分
List<List<Vertex>> coarseResult = BiSegment(sentence, 2, wordNetOptimum, wordNetAll);
boolean NERexists = false;
for (List<Vertex> vertexList : coarseResult) {
if (HanLP.Config.DEBUG) {
System.out.println("粗分结果" + convert(vertexList, false));
}
// 实体命名识别
if (config.ner) {
wordNetOptimum.addAll(vertexList);
int preSize = wordNetOptimum.size();
if (config.nameRecognize) {
PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.translatedNameRecognize) {
TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.japaneseNameRecognize) {
JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.placeRecognize) {
PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (config.organizationRecognize) {
// 层叠隐马模型——生成输出作为下一级隐马输入
vertexList = Dijkstra.compute(GenerateBiGraph(wordNetOptimum));
wordNetOptimum.addAll(vertexList);
OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
}
if (!NERexists && preSize != wordNetOptimum.size()) {
NERexists = true;
}
}
}
List<Vertex> vertexList = coarseResult.get(0);
if (NERexists) {
Graph graph = GenerateBiGraph(wordNetOptimum);
vertexList = Dijkstra.compute(graph);
if (HanLP.Config.DEBUG) {
System.out.printf("细分词网:\n%s\n", wordNetOptimum);
System.out.printf("细分词图:%s\n", graph.printByTo());
}
}
// 数字识别
if (config.numberQuantifierRecognize) {
mergeNumberQuantifier(vertexList, wordNetAll, config);
}
// 如果是索引模式则全切分
if (config.indexMode) {
return decorateResultForIndexMode(vertexList, wordNetAll);
}
// 是否标注词性
if (config.speechTagging) {
speechTagging(vertexList);
}
if (config.useCustomDictionary) {
if (config.indexMode)
combineByCustomDictionary(vertexList, wordNetAll);
else
combineByCustomDictionary(vertexList);
}
return convert(vertexList, config.offset);
}