use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.
the class AtomNode method convert.
public static Vertex convert(String word, int type) {
String name = word;
Nature nature = Nature.n;
int dValue = 1;
switch(type) {
case Predefine.CT_CHINESE:
break;
case Predefine.CT_INDEX:
case Predefine.CT_NUM:
nature = Nature.m;
word = "未##数";
break;
case Predefine.CT_DELIMITER:
nature = Nature.w;
break;
case Predefine.CT_LETTER:
nature = Nature.nx;
word = "未##串";
break;
case //12021-2129-3121
Predefine.CT_SINGLE:
// if (Pattern.compile("^(-?\\d+)(\\.\\d+)?$").matcher(word).matches())//匹配浮点数
// {
// nature = Nature.m;
// word = "未##数";
// } else
// {
nature = Nature.nx;
word = "未##串";
// }
break;
default:
break;
}
return new Vertex(word, name, new CoreDictionary.Attribute(nature, dValue));
}
use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.
the class DoubleArrayTrieSegment method segSentence.
@Override
protected List<Term> segSentence(char[] sentence) {
char[] charArray = sentence;
final int[] wordNet = new int[charArray.length];
Arrays.fill(wordNet, 1);
final Nature[] natureArray = config.speechTagging ? new Nature[charArray.length] : null;
DoubleArrayTrie<CoreDictionary.Attribute>.Searcher<CoreDictionary.Attribute> searcher = CoreDictionary.trie.getSearcher(sentence, 0);
while (searcher.next()) {
int length = searcher.length;
if (length > wordNet[searcher.begin]) {
wordNet[searcher.begin] = length;
if (config.speechTagging) {
natureArray[searcher.begin] = searcher.value.nature[0];
}
}
}
if (config.useCustomDictionary) {
CustomDictionary.parseText(charArray, new AhoCorasickDoubleArrayTrie.IHit<CoreDictionary.Attribute>() {
@Override
public void hit(int begin, int end, CoreDictionary.Attribute value) {
int length = end - begin;
if (length > wordNet[begin]) {
wordNet[begin] = length;
if (config.speechTagging) {
natureArray[begin] = value.nature[0];
}
}
}
});
}
LinkedList<Term> termList = new LinkedList<Term>();
if (config.speechTagging) {
for (int i = 0; i < natureArray.length; ) {
if (natureArray[i] == null) {
int j = i + 1;
for (; j < natureArray.length; ++j) {
if (natureArray[j] != null)
break;
}
List<AtomNode> atomNodeList = quickAtomSegment(charArray, i, j);
for (AtomNode atomNode : atomNodeList) {
if (atomNode.sWord.length() >= wordNet[i]) {
wordNet[i] = atomNode.sWord.length();
natureArray[i] = atomNode.getNature();
i += wordNet[i];
}
}
i = j;
} else {
++i;
}
}
}
for (int i = 0; i < wordNet.length; ) {
Term term = new Term(new String(charArray, i, wordNet[i]), config.speechTagging ? (natureArray[i] == null ? Nature.nz : natureArray[i]) : null);
term.offset = i;
termList.add(term);
i += wordNet[i];
}
return termList;
}
use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.
the class CoreDictionary method loadDat.
/**
* 从磁盘加载双数组
*
* @param path
* @return
*/
static boolean loadDat(String path) {
try {
ByteArray byteArray = ByteArray.createByteArray(path + Predefine.BIN_EXT);
if (byteArray == null)
return false;
int size = byteArray.nextInt();
CoreDictionary.Attribute[] attributes = new CoreDictionary.Attribute[size];
final Nature[] natureIndexArray = Nature.values();
for (int i = 0; i < size; ++i) {
// 第一个是全部频次,第二个是词性个数
int currentTotalFrequency = byteArray.nextInt();
int length = byteArray.nextInt();
attributes[i] = new CoreDictionary.Attribute(length);
attributes[i].totalFrequency = currentTotalFrequency;
for (int j = 0; j < length; ++j) {
attributes[i].nature[j] = natureIndexArray[byteArray.nextInt()];
attributes[i].frequency[j] = byteArray.nextInt();
}
}
if (!trie.load(byteArray, attributes) || byteArray.hasMore())
return false;
} catch (Exception e) {
logger.warning("读取失败,问题发生在" + e);
return false;
}
return true;
}
use of com.hankcs.hanlp.corpus.tag.Nature in project HanLP by hankcs.
the class CustomNatureUtility method addNature.
/**
* 增加词性
* @param name 词性名称
* @return 词性
*/
public static Nature addNature(String name) {
Nature customNature = extraValueMap.get(name);
if (customNature != null)
return customNature;
customNature = enumBuster.make(name);
enumBuster.addByValue(customNature);
extraValueMap.put(name, customNature);
// 必须对词性标注HMM模型中的元组做出调整
CoreDictionaryTransformMatrixDictionary.transformMatrixDictionary.extendSize();
return customNature;
}
Aggregations