Search in sources :

Example 6 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class PlaceRecognition method roleTag.

public static List<EnumItem<NS>> roleTag(List<Vertex> vertexList, WordNet wordNetAll) {
    List<EnumItem<NS>> tagList = new LinkedList<EnumItem<NS>>();
    ListIterator<Vertex> listIterator = vertexList.listIterator();
    //        int line = 0;
    while (listIterator.hasNext()) {
        Vertex vertex = listIterator.next();
        //            }
        if (Nature.ns == vertex.getNature() && vertex.getAttribute().totalFrequency <= 1000) {
            if (// 二字地名,认为其可以再接一个后缀或前缀
            vertex.realWord.length() < 3)
                tagList.add(new EnumItem<NS>(NS.H, NS.G));
            else
                // 否则只可以再加后缀
                tagList.add(new EnumItem<NS>(NS.G));
            continue;
        }
        // 此处用等效词,更加精准
        EnumItem<NS> NSEnumItem = PlaceDictionary.dictionary.get(vertex.word);
        if (NSEnumItem == null) {
            NSEnumItem = new EnumItem<NS>(NS.Z, PlaceDictionary.transformMatrixDictionary.getTotalFrequency(NS.Z));
        }
        tagList.add(NSEnumItem);
    //            line += vertex.realWord.length();
    }
    return tagList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) NS(com.hankcs.hanlp.corpus.tag.NS) EnumItem(com.hankcs.hanlp.corpus.dictionary.item.EnumItem) LinkedList(java.util.LinkedList)

Example 7 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class OrganizationRecognition method roleTag.

public static List<EnumItem<NT>> roleTag(List<Vertex> vertexList, WordNet wordNetAll) {
    List<EnumItem<NT>> tagList = new LinkedList<EnumItem<NT>>();
    //        int line = 0;
    for (Vertex vertex : vertexList) {
        // 构成更长的
        Nature nature = vertex.guessNature();
        switch(nature) {
            case nrf:
                {
                    if (vertex.getAttribute().totalFrequency <= 1000) {
                        tagList.add(new EnumItem<NT>(NT.F, 1000));
                    } else
                        break;
                }
                continue;
            case ni:
            case nic:
            case nis:
            case nit:
                {
                    EnumItem<NT> ntEnumItem = new EnumItem<NT>(NT.K, 1000);
                    ntEnumItem.addLabel(NT.D, 1000);
                    tagList.add(ntEnumItem);
                }
                continue;
            case m:
                {
                    EnumItem<NT> ntEnumItem = new EnumItem<NT>(NT.M, 1000);
                    tagList.add(ntEnumItem);
                }
                continue;
        }
        // 此处用等效词,更加精准
        EnumItem<NT> NTEnumItem = OrganizationDictionary.dictionary.get(vertex.word);
        if (NTEnumItem == null) {
            NTEnumItem = new EnumItem<NT>(NT.Z, OrganizationDictionary.transformMatrixDictionary.getTotalFrequency(NT.Z));
        }
        tagList.add(NTEnumItem);
    //            line += vertex.realWord.length();
    }
    return tagList;
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature) Vertex(com.hankcs.hanlp.seg.common.Vertex) NT(com.hankcs.hanlp.corpus.tag.NT) EnumItem(com.hankcs.hanlp.corpus.dictionary.item.EnumItem) LinkedList(java.util.LinkedList)

Example 8 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class CRFSegment method toTermList.

/**
     * 将一条路径转为最终结果
     *
     * @param vertexList
     * @param offsetEnabled 是否计算offset
     * @return
     */
protected static List<Term> toTermList(List<Vertex> vertexList, boolean offsetEnabled) {
    assert vertexList != null;
    int length = vertexList.size();
    List<Term> resultList = new ArrayList<Term>(length);
    Iterator<Vertex> iterator = vertexList.iterator();
    if (offsetEnabled) {
        int offset = 0;
        for (int i = 0; i < length; ++i) {
            Vertex vertex = iterator.next();
            Term term = convert(vertex);
            term.offset = offset;
            offset += term.length();
            resultList.add(term);
        }
    } else {
        for (int i = 0; i < length; ++i) {
            Vertex vertex = iterator.next();
            Term term = convert(vertex);
            resultList.add(term);
        }
    }
    return resultList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) Term(com.hankcs.hanlp.seg.common.Term)

Example 9 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class PersonRecognition method roleObserve.

/**
     * 角色观察(从模型中加载所有词语对应的所有角色,允许进行一些规则补充)
     * @param wordSegResult 粗分结果
     * @return
     */
public static List<EnumItem<NR>> roleObserve(List<Vertex> wordSegResult) {
    List<EnumItem<NR>> tagList = new LinkedList<EnumItem<NR>>();
    for (Vertex vertex : wordSegResult) {
        EnumItem<NR> nrEnumItem = PersonDictionary.dictionary.get(vertex.realWord);
        if (nrEnumItem == null) {
            switch(vertex.guessNature()) {
                case nr:
                    {
                        // 有些双名实际上可以构成更长的三名
                        if (vertex.getAttribute().totalFrequency <= 1000 && vertex.realWord.length() == 2) {
                            nrEnumItem = new EnumItem<NR>(NR.X, NR.G);
                        } else
                            nrEnumItem = new EnumItem<NR>(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A));
                    }
                    break;
                case nnt:
                    {
                        // 姓+职位
                        nrEnumItem = new EnumItem<NR>(NR.G, NR.K);
                    }
                    break;
                default:
                    {
                        nrEnumItem = new EnumItem<NR>(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A));
                    }
                    break;
            }
        }
        tagList.add(nrEnumItem);
    }
    return tagList;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) NR(com.hankcs.hanlp.corpus.tag.NR) EnumItem(com.hankcs.hanlp.corpus.dictionary.item.EnumItem) LinkedList(java.util.LinkedList)

Example 10 with Vertex

use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.

the class PersonRecognition method Recognition.

public static boolean Recognition(List<Vertex> pWordSegResult, WordNet wordNetOptimum, WordNet wordNetAll) {
    List<EnumItem<NR>> roleTagList = roleObserve(pWordSegResult);
    if (HanLP.Config.DEBUG) {
        StringBuilder sbLog = new StringBuilder();
        Iterator<Vertex> iterator = pWordSegResult.iterator();
        for (EnumItem<NR> nrEnumItem : roleTagList) {
            sbLog.append('[');
            sbLog.append(iterator.next().realWord);
            sbLog.append(' ');
            sbLog.append(nrEnumItem);
            sbLog.append(']');
        }
        System.out.printf("人名角色观察:%s\n", sbLog.toString());
    }
    List<NR> nrList = viterbiComputeSimply(roleTagList);
    if (HanLP.Config.DEBUG) {
        StringBuilder sbLog = new StringBuilder();
        Iterator<Vertex> iterator = pWordSegResult.iterator();
        sbLog.append('[');
        for (NR nr : nrList) {
            sbLog.append(iterator.next().realWord);
            sbLog.append('/');
            sbLog.append(nr);
            sbLog.append(" ,");
        }
        if (sbLog.length() > 1)
            sbLog.delete(sbLog.length() - 2, sbLog.length());
        sbLog.append(']');
        System.out.printf("人名角色标注:%s\n", sbLog.toString());
    }
    PersonDictionary.parsePattern(nrList, pWordSegResult, wordNetOptimum, wordNetAll);
    return true;
}
Also used : Vertex(com.hankcs.hanlp.seg.common.Vertex) NR(com.hankcs.hanlp.corpus.tag.NR) EnumItem(com.hankcs.hanlp.corpus.dictionary.item.EnumItem)

Aggregations

Vertex (com.hankcs.hanlp.seg.common.Vertex)33 EnumItem (com.hankcs.hanlp.corpus.dictionary.item.EnumItem)6 LinkedList (java.util.LinkedList)6 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)5 Term (com.hankcs.hanlp.seg.common.Term)5 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)4 NS (com.hankcs.hanlp.corpus.tag.NS)4 Nature (com.hankcs.hanlp.corpus.tag.Nature)4 NR (com.hankcs.hanlp.corpus.tag.NR)3 NT (com.hankcs.hanlp.corpus.tag.NT)3 Graph (com.hankcs.hanlp.seg.common.Graph)2 WordNet (com.hankcs.hanlp.seg.common.WordNet)2 DoubleArrayTrie (com.hankcs.hanlp.collection.trie.DoubleArrayTrie)1 CharTable (com.hankcs.hanlp.dictionary.other.CharTable)1 Table (com.hankcs.hanlp.model.crf.Table)1 State (com.hankcs.hanlp.seg.Dijkstra.Path.State)1 EdgeFrom (com.hankcs.hanlp.seg.common.EdgeFrom)1 List (java.util.List)1 PriorityQueue (java.util.PriorityQueue)1