Search in sources :

Example 26 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class NShortSegment method BiSegment.

/**
     * 二元语言模型分词
     * @param sSentence 待分词的句子
     * @param nKind     需要几个结果
     * @param wordNetOptimum
     * @param wordNetAll
     * @return 一系列粗分结果
     */
public List<List<Vertex>> BiSegment(char[] sSentence, int nKind, WordNet wordNetOptimum, WordNet wordNetAll) {
    List<List<Vertex>> coarseResult = new LinkedList<List<Vertex>>();
    ////////////////生成词网////////////////////
    GenerateWordNet(wordNetAll);
    //        logger.trace("词网大小:" + wordNetAll.size());
    //        logger.trace("打印词网:\n" + wordNetAll);
    ///////////////生成词图////////////////////
    Graph graph = GenerateBiGraph(wordNetAll);
    //        logger.trace(graph.toString());
    if (HanLP.Config.DEBUG) {
        System.out.printf("打印词图:%s\n", graph.printByTo());
    }
    ///////////////N-最短路径////////////////////
    NShortPath nShortPath = new NShortPath(graph, nKind);
    List<int[]> spResult = nShortPath.getNPaths(nKind * 2);
    if (spResult.size() == 0) {
        throw new RuntimeException(nKind + "-最短路径求解失败,请检查上面的词网是否存在负圈或悬孤节点");
    }
    //////////////日期、数字合并策略
    for (int[] path : spResult) {
        List<Vertex> vertexes = graph.parsePath(path);
        GenerateWord(vertexes, wordNetOptimum);
        coarseResult.add(vertexes);
    }
    return coarseResult;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) Graph(com.hankcs.hanlp.seg.common.Graph)

Example 27 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class AtomNode method convert.

public static Vertex convert(String word, int type) {
    String name = word;
    Nature nature = Nature.n;
    int dValue = 1;
    switch(type) {
        case Predefine.CT_CHINESE:
            break;
        case Predefine.CT_INDEX:
        case Predefine.CT_NUM:
            nature = Nature.m;
            word = "未##数";
            break;
        case Predefine.CT_DELIMITER:
            nature = Nature.w;
            break;
        case Predefine.CT_LETTER:
            nature = Nature.nx;
            word = "未##串";
            break;
        case //12021-2129-3121
        Predefine.CT_SINGLE:
            //                if (Pattern.compile("^(-?\\d+)(\\.\\d+)?$").matcher(word).matches())//匹配浮点数
            //                {
            //                    nature = Nature.m;
            //                    word = "未##数";
            //                } else
            //                {
            nature = Nature.nx;
            word = "未##串";
            //                }
            break;
        default:
            break;
    }
    return new Vertex(word, name, new CoreDictionary.Attribute(nature, dValue));
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature) Vertex(com.hankcs.hanlp.seg.common.Vertex) CoreDictionary(com.hankcs.hanlp.dictionary.CoreDictionary)

Example 28 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class TranslatedPersonRecognition method Recognition.

/**
     * 执行识别
     * @param segResult 粗分结果
     * @param wordNetOptimum 粗分结果对应的词图
     * @param wordNetAll 全词图
     */
public static void Recognition(List<Vertex> segResult, WordNet wordNetOptimum, WordNet wordNetAll) {
    StringBuilder sbName = new StringBuilder();
    int appendTimes = 0;
    ListIterator<Vertex> listIterator = segResult.listIterator();
    listIterator.next();
    int line = 1;
    int activeLine = 1;
    while (listIterator.hasNext()) {
        Vertex vertex = listIterator.next();
        if (appendTimes > 0) {
            if (vertex.guessNature() == Nature.nrf || TranslatedPersonDictionary.containsKey(vertex.realWord)) {
                sbName.append(vertex.realWord);
                ++appendTimes;
            } else {
                // 识别结束
                if (appendTimes > 1) {
                    if (HanLP.Config.DEBUG) {
                        System.out.println("音译人名识别出:" + sbName.toString());
                    }
                    wordNetOptimum.insert(activeLine, new Vertex(Predefine.TAG_PEOPLE, sbName.toString(), new CoreDictionary.Attribute(Nature.nrf), WORD_ID), wordNetAll);
                }
                sbName.setLength(0);
                appendTimes = 0;
            }
        } else {
            // nrf和nsf触发识别
            if (vertex.guessNature() == Nature.nrf || vertex.getNature() == Nature.nsf) //                        || TranslatedPersonDictionary.containsKey(vertex.realWord)
            {
                sbName.append(vertex.realWord);
                ++appendTimes;
                activeLine = line;
            }
        }
        line += vertex.realWord.length();
    }
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex)

Example 29 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class PlaceRecognition method insert.

private static void insert(ListIterator<Vertex> listIterator, List<EnumItem<NS>> tagList, WordNet wordNetAll, int line, NS ns) {
    Vertex vertex = wordNetAll.getFirst(line);
    assert vertex != null : "全词网居然有空白行!";
    listIterator.add(vertex);
    tagList.add(new EnumItem<NS>(ns, 1000));
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) NS(com.hankcs.hanlp.corpus.tag.NS)

Example 30 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class OrganizationRecognition method Recognition.

public static boolean Recognition(List<Vertex> pWordSegResult, WordNet wordNetOptimum, WordNet wordNetAll) {
    List<EnumItem<NT>> roleTagList = roleTag(pWordSegResult, wordNetAll);
    if (HanLP.Config.DEBUG) {
        StringBuilder sbLog = new StringBuilder();
        Iterator<Vertex> iterator = pWordSegResult.iterator();
        for (EnumItem<NT> NTEnumItem : roleTagList) {
            sbLog.append('[');
            sbLog.append(iterator.next().realWord);
            sbLog.append(' ');
            sbLog.append(NTEnumItem);
            sbLog.append(']');
        }
        System.out.printf("机构名角色观察:%s\n", sbLog.toString());
    }
    List<NT> NTList = viterbiExCompute(roleTagList);
    if (HanLP.Config.DEBUG) {
        StringBuilder sbLog = new StringBuilder();
        Iterator<Vertex> iterator = pWordSegResult.iterator();
        sbLog.append('[');
        for (NT NT : NTList) {
            sbLog.append(iterator.next().realWord);
            sbLog.append('/');
            sbLog.append(NT);
            sbLog.append(" ,");
        }
        if (sbLog.length() > 1)
            sbLog.delete(sbLog.length() - 2, sbLog.length());
        sbLog.append(']');
        System.out.printf("机构名角色标注:%s\n", sbLog.toString());
    }
    OrganizationDictionary.parsePattern(NTList, pWordSegResult, wordNetOptimum, wordNetAll);
    return true;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) NT(com.hankcs.hanlp.corpus.tag.NT) EnumItem(com.hankcs.hanlp.corpus.dictionary.item.EnumItem)

Aggregations

Vertex (com.hankcs.hanlp.seg.common.Vertex)33 EnumItem (com.hankcs.hanlp.corpus.dictionary.item.EnumItem)6 LinkedList (java.util.LinkedList)6 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)5 Term (com.hankcs.hanlp.seg.common.Term)5 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)4 NS (com.hankcs.hanlp.corpus.tag.NS)4 Nature (com.hankcs.hanlp.corpus.tag.Nature)4 NR (com.hankcs.hanlp.corpus.tag.NR)3 NT (com.hankcs.hanlp.corpus.tag.NT)3 Graph (com.hankcs.hanlp.seg.common.Graph)2 WordNet (com.hankcs.hanlp.seg.common.WordNet)2 DoubleArrayTrie (com.hankcs.hanlp.collection.trie.DoubleArrayTrie)1 CharTable (com.hankcs.hanlp.dictionary.other.CharTable)1 Table (com.hankcs.hanlp.model.crf.Table)1 State (com.hankcs.hanlp.seg.Dijkstra.Path.State)1 EdgeFrom (com.hankcs.hanlp.seg.common.EdgeFrom)1 List (java.util.List)1 PriorityQueue (java.util.PriorityQueue)1