Search in sources :

Example 1 with WordNet

use of com.hankcs.hanlp.seg.common.WordNet in project HanLP by hankcs.

the class ViterbiSegment method segSentence.

@Override
protected List<Term> segSentence(char[] sentence) {
    //        long start = System.currentTimeMillis();
    WordNet wordNetAll = new WordNet(sentence);
    ////////////////生成词网////////////////////
    GenerateWordNet(wordNetAll);
    //        System.out.println("构图:" + (System.currentTimeMillis() - start));
    if (HanLP.Config.DEBUG) {
        System.out.printf("粗分词网:\n%s\n", wordNetAll);
    }
    //        start = System.currentTimeMillis();
    List<Vertex> vertexList = viterbi(wordNetAll);
    if (config.useCustomDictionary) {
        if (config.indexMode)
            combineByCustomDictionary(vertexList, wordNetAll);
        else
            combineByCustomDictionary(vertexList);
    }
    if (HanLP.Config.DEBUG) {
        System.out.println("粗分结果" + convert(vertexList, false));
    }
    // 数字识别
    if (config.numberQuantifierRecognize) {
        mergeNumberQuantifier(vertexList, wordNetAll, config);
    }
    // 实体命名识别
    if (config.ner) {
        WordNet wordNetOptimum = new WordNet(sentence, vertexList);
        int preSize = wordNetOptimum.size();
        if (config.nameRecognize) {
            PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
        }
        if (config.translatedNameRecognize) {
            TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
        }
        if (config.japaneseNameRecognize) {
            JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
        }
        if (config.placeRecognize) {
            PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
        }
        if (config.organizationRecognize) {
            // 层叠隐马模型——生成输出作为下一级隐马输入
            vertexList = viterbi(wordNetOptimum);
            wordNetOptimum.clear();
            wordNetOptimum.addAll(vertexList);
            preSize = wordNetOptimum.size();
            OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
        }
        if (wordNetOptimum.size() != preSize) {
            vertexList = viterbi(wordNetOptimum);
            if (HanLP.Config.DEBUG) {
                System.out.printf("细分词网:\n%s\n", wordNetOptimum);
            }
        }
    }
    // 如果是索引模式则全切分
    if (config.indexMode) {
        return decorateResultForIndexMode(vertexList, wordNetAll);
    }
    // 是否标注词性
    if (config.speechTagging) {
        speechTagging(vertexList);
    }
    return convert(vertexList, config.offset);
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) WordNet(com.hankcs.hanlp.seg.common.WordNet)

Example 2 with WordNet

use of com.hankcs.hanlp.seg.common.WordNet in project HanLP by hankcs.

the class NShortSegment method segSentence.

@Override
public List<Term> segSentence(char[] sentence) {
    WordNet wordNetOptimum = new WordNet(sentence);
    WordNet wordNetAll = new WordNet(sentence);
    //        char[] charArray = text.toCharArray();
    // 粗分
    List<List<Vertex>> coarseResult = BiSegment(sentence, 2, wordNetOptimum, wordNetAll);
    boolean NERexists = false;
    for (List<Vertex> vertexList : coarseResult) {
        if (HanLP.Config.DEBUG) {
            System.out.println("粗分结果" + convert(vertexList, false));
        }
        // 实体命名识别
        if (config.ner) {
            wordNetOptimum.addAll(vertexList);
            int preSize = wordNetOptimum.size();
            if (config.nameRecognize) {
                PersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            if (config.translatedNameRecognize) {
                TranslatedPersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            if (config.japaneseNameRecognize) {
                JapanesePersonRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            if (config.placeRecognize) {
                PlaceRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            if (config.organizationRecognize) {
                // 层叠隐马模型——生成输出作为下一级隐马输入
                vertexList = Dijkstra.compute(GenerateBiGraph(wordNetOptimum));
                wordNetOptimum.addAll(vertexList);
                OrganizationRecognition.Recognition(vertexList, wordNetOptimum, wordNetAll);
            }
            if (!NERexists && preSize != wordNetOptimum.size()) {
                NERexists = true;
            }
        }
    }
    List<Vertex> vertexList = coarseResult.get(0);
    if (NERexists) {
        Graph graph = GenerateBiGraph(wordNetOptimum);
        vertexList = Dijkstra.compute(graph);
        if (HanLP.Config.DEBUG) {
            System.out.printf("细分词网:\n%s\n", wordNetOptimum);
            System.out.printf("细分词图:%s\n", graph.printByTo());
        }
    }
    // 数字识别
    if (config.numberQuantifierRecognize) {
        mergeNumberQuantifier(vertexList, wordNetAll, config);
    }
    // 如果是索引模式则全切分
    if (config.indexMode) {
        return decorateResultForIndexMode(vertexList, wordNetAll);
    }
    // 是否标注词性
    if (config.speechTagging) {
        speechTagging(vertexList);
    }
    if (config.useCustomDictionary) {
        if (config.indexMode)
            combineByCustomDictionary(vertexList, wordNetAll);
        else
            combineByCustomDictionary(vertexList);
    }
    return convert(vertexList, config.offset);
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) Graph(com.hankcs.hanlp.seg.common.Graph) WordNet(com.hankcs.hanlp.seg.common.WordNet)

Aggregations

Vertex (com.hankcs.hanlp.seg.common.Vertex)2 WordNet (com.hankcs.hanlp.seg.common.WordNet)2 Graph (com.hankcs.hanlp.seg.common.Graph)1