Search in sources :

Example 11 with Nature

use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.

the class AtomNode method convert.

public static Vertex convert(String word, int type) {
    String name = word;
    Nature nature = Nature.n;
    int dValue = 1;
    switch(type) {
        case Predefine.CT_CHINESE:
            break;
        case Predefine.CT_INDEX:
        case Predefine.CT_NUM:
            nature = Nature.m;
            word = "未##数";
            break;
        case Predefine.CT_DELIMITER:
            nature = Nature.w;
            break;
        case Predefine.CT_LETTER:
            nature = Nature.nx;
            word = "未##串";
            break;
        case //12021-2129-3121
        Predefine.CT_SINGLE:
            //                if (Pattern.compile("^(-?\\d+)(\\.\\d+)?$").matcher(word).matches())//匹配浮点数
            //                {
            //                    nature = Nature.m;
            //                    word = "未##数";
            //                } else
            //                {
            nature = Nature.nx;
            word = "未##串";
            //                }
            break;
        default:
            break;
    }
    return new Vertex(word, name, new CoreDictionary.Attribute(nature, dValue));
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature) Vertex(com.hankcs.hanlp.seg.common.Vertex) CoreDictionary(com.hankcs.hanlp.dictionary.CoreDictionary)

Example 12 with Nature

use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.

the class DoubleArrayTrieSegment method segSentence.

@Override
protected List<Term> segSentence(char[] sentence) {
    char[] charArray = sentence;
    final int[] wordNet = new int[charArray.length];
    Arrays.fill(wordNet, 1);
    final Nature[] natureArray = config.speechTagging ? new Nature[charArray.length] : null;
    DoubleArrayTrie<CoreDictionary.Attribute>.Searcher<CoreDictionary.Attribute> searcher = CoreDictionary.trie.getSearcher(sentence, 0);
    while (searcher.next()) {
        int length = searcher.length;
        if (length > wordNet[searcher.begin]) {
            wordNet[searcher.begin] = length;
            if (config.speechTagging) {
                natureArray[searcher.begin] = searcher.value.nature[0];
            }
        }
    }
    if (config.useCustomDictionary) {
        CustomDictionary.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() {

            @Override
            public void hit(int begin, int end, CoreDictionary.Attribute value) {
                int length = end - begin;
                if (length > wordNet[begin]) {
                    wordNet[begin] = length;
                    if (config.speechTagging) {
                        natureArray[begin] = value.nature[0];
                    }
                }
            }
        });
    }
    LinkedList<Term> termList = new LinkedList<Term>();
    if (config.speechTagging) {
        for (int i = 0; i < natureArray.length; ) {
            if (natureArray[i] == null) {
                int j = i + 1;
                for (; j < natureArray.length; ++j) {
                    if (natureArray[j] != null)
                        break;
                }
                List<AtomNode> atomNodeList = quickAtomSegment(charArray, i, j);
                for (AtomNode atomNode : atomNodeList) {
                    if (atomNode.sWord.length() >= wordNet[i]) {
                        wordNet[i] = atomNode.sWord.length();
                        natureArray[i] = atomNode.getNature();
                        i += wordNet[i];
                    }
                }
                i = j;
            } else {
                ++i;
            }
        }
    }
    for (int i = 0; i < wordNet.length; ) {
        Term term = new Term(new String(charArray, i, wordNet[i]), config.speechTagging ? (natureArray[i] == null ? Nature.nz : natureArray[i]) : null);
        term.offset = i;
        termList.add(term);
        i += wordNet[i];
    }
    return termList;
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature) Term(com.hankcs.hanlp.seg.common.Term) AhoCorasickDoubleArrayTrie(com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie) DoubleArrayTrie(com.hankcs.hanlp.collection.trie.DoubleArrayTrie) LinkedList(java.util.LinkedList) AhoCorasickDoubleArrayTrie(com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie) CoreDictionary(com.hankcs.hanlp.dictionary.CoreDictionary) AtomNode(com.hankcs.hanlp.seg.NShort.Path.AtomNode)

Example 13 with Nature

use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.

the class CoreDictionary method loadDat.

/**
     * 从磁盘加载双数组
     *
     * @param path
     * @return
     */
static boolean loadDat(String path) {
    try {
        ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
        if (byteArray == null)
            return false;
        int size = byteArray.nextInt();
        CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
        final Nature[] natureIndexArray = Nature.values();
        for (int i = 0; i < size; ++i) {
            // 第一个是全部频次,第二个是词性个数
            int currentTotalFrequency = byteArray.nextInt();
            int length = byteArray.nextInt();
            attributes[i] = new CoreDictionary.Attribute(length);
            attributes[i].totalFrequency = currentTotalFrequency;
            for (int j = 0; j < length; ++j) {
                attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
                attributes[i].frequency[j] = byteArray.nextInt();
            }
        }
        if (!trie.load(byteArray, attributes) || byteArray.hasMore())
            return false;
    } catch (Exception e) {
        logger.warning("读取失败,问题发生在" + e);
        return false;
    }
    return true;
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature) ByteArray(com.hankcs.hanlp.corpus.io.ByteArray)

Example 14 with Nature

use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.

the class CustomNatureUtility method addNature.

/**
     * 增加词性
     * @param name 词性名称
     * @return 词性
     */
public static Nature addNature(String name) {
    Nature customNature = extraValueMap.get(name);
    if (customNature != null)
        return customNature;
    customNature = enumBuster.make(name);
    enumBuster.addByValue(customNature);
    extraValueMap.put(name, customNature);
    // 必须对词性标注HMM模型中的元组做出调整
    CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary.extendSize();
    return customNature;
}
Also used : Nature(com.hankcs.hanlp.corpus.tag.Nature)

Aggregations

Nature (com.hankcs.hanlp.corpus.tag.Nature)14 Vertex (com.hankcs.hanlp.seg.common.Vertex)4 CoreDictionary (com.hankcs.hanlp.dictionary.CoreDictionary)3 AtomNode (com.hankcs.hanlp.seg.NShort.Path.AtomNode)3 Term (com.hankcs.hanlp.seg.common.Term)3 AhoCorasickDoubleArrayTrie (com.hankcs.hanlp.collection.AhoCorasick.AhoCorasickDoubleArrayTrie)2 ByteArray (com.hankcs.hanlp.corpus.io.ByteArray)2 LinkedList (java.util.LinkedList)2 DoubleArrayTrie (com.hankcs.hanlp.collection.trie.DoubleArrayTrie)1 EnumItem (com.hankcs.hanlp.corpus.dictionary.item.EnumItem)1 NT (com.hankcs.hanlp.corpus.tag.NT)1