use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class PlaceRecognition method roleTag.
public static List<EnumItem<NS>> roleTag(List<Vertex> vertexList, WordNet wordNetAll) {
List<EnumItem<NS>> tagList = new LinkedList<EnumItem<NS>>();
ListIterator<Vertex> listIterator = vertexList.listIterator();
// int line = 0;
while (listIterator.hasNext()) {
Vertex vertex = listIterator.next();
// }
if (Nature.ns == vertex.getNature() && vertex.getAttribute().totalFrequency <= 1000) {
if (// 二字地名,认为其可以再接一个后缀或前缀
vertex.realWord.length() < 3)
tagList.add(new EnumItem<NS>(NS.H, NS.G));
else
// 否则只可以再加后缀
tagList.add(new EnumItem<NS>(NS.G));
continue;
}
// 此处用等效词,更加精准
EnumItem<NS> NSEnumItem = PlaceDictionary.dictionary.get(vertex.word);
if (NSEnumItem == null) {
NSEnumItem = new EnumItem<NS>(NS.Z, PlaceDictionary.transformMatrixDictionary.getTotalFrequency(NS.Z));
}
tagList.add(NSEnumItem);
// line += vertex.realWord.length();
}
return tagList;
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class OrganizationRecognition method roleTag.
public static List<EnumItem<NT>> roleTag(List<Vertex> vertexList, WordNet wordNetAll) {
List<EnumItem<NT>> tagList = new LinkedList<EnumItem<NT>>();
// int line = 0;
for (Vertex vertex : vertexList) {
// 构成更长的
Nature nature = vertex.guessNature();
switch(nature) {
case nrf:
{
if (vertex.getAttribute().totalFrequency <= 1000) {
tagList.add(new EnumItem<NT>(NT.F, 1000));
} else
break;
}
continue;
case ni:
case nic:
case nis:
case nit:
{
EnumItem<NT> ntEnumItem = new EnumItem<NT>(NT.K, 1000);
ntEnumItem.addLabel(NT.D, 1000);
tagList.add(ntEnumItem);
}
continue;
case m:
{
EnumItem<NT> ntEnumItem = new EnumItem<NT>(NT.M, 1000);
tagList.add(ntEnumItem);
}
continue;
}
// 此处用等效词,更加精准
EnumItem<NT> NTEnumItem = OrganizationDictionary.dictionary.get(vertex.word);
if (NTEnumItem == null) {
NTEnumItem = new EnumItem<NT>(NT.Z, OrganizationDictionary.transformMatrixDictionary.getTotalFrequency(NT.Z));
}
tagList.add(NTEnumItem);
// line += vertex.realWord.length();
}
return tagList;
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class CRFSegment method toTermList.
/**
* 将一条路径转为最终结果
*
* @param vertexList
* @param offsetEnabled 是否计算offset
* @return
*/
protected static List<Term> toTermList(List<Vertex> vertexList, boolean offsetEnabled) {
assert vertexList != null;
int length = vertexList.size();
List<Term> resultList = new ArrayList<Term>(length);
Iterator<Vertex> iterator = vertexList.iterator();
if (offsetEnabled) {
int offset = 0;
for (int i = 0; i < length; ++i) {
Vertex vertex = iterator.next();
Term term = convert(vertex);
term.offset = offset;
offset += term.length();
resultList.add(term);
}
} else {
for (int i = 0; i < length; ++i) {
Vertex vertex = iterator.next();
Term term = convert(vertex);
resultList.add(term);
}
}
return resultList;
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class PersonRecognition method roleObserve.
/**
* 角色观察(从模型中加载所有词语对应的所有角色,允许进行一些规则补充)
* @param wordSegResult 粗分结果
* @return
*/
public static List<EnumItem<NR>> roleObserve(List<Vertex> wordSegResult) {
List<EnumItem<NR>> tagList = new LinkedList<EnumItem<NR>>();
for (Vertex vertex : wordSegResult) {
EnumItem<NR> nrEnumItem = PersonDictionary.dictionary.get(vertex.realWord);
if (nrEnumItem == null) {
switch(vertex.guessNature()) {
case nr:
{
// 有些双名实际上可以构成更长的三名
if (vertex.getAttribute().totalFrequency <= 1000 && vertex.realWord.length() == 2) {
nrEnumItem = new EnumItem<NR>(NR.X, NR.G);
} else
nrEnumItem = new EnumItem<NR>(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A));
}
break;
case nnt:
{
// 姓+职位
nrEnumItem = new EnumItem<NR>(NR.G, NR.K);
}
break;
default:
{
nrEnumItem = new EnumItem<NR>(NR.A, PersonDictionary.transformMatrixDictionary.getTotalFrequency(NR.A));
}
break;
}
}
tagList.add(nrEnumItem);
}
return tagList;
}
use of com.hankcs.hanlp.seg.common.Vertex in project HanLP by hankcs.
the class PersonRecognition method Recognition.
public static boolean Recognition(List<Vertex> pWordSegResult, WordNet wordNetOptimum, WordNet wordNetAll) {
List<EnumItem<NR>> roleTagList = roleObserve(pWordSegResult);
if (HanLP.Config.DEBUG) {
StringBuilder sbLog = new StringBuilder();
Iterator<Vertex> iterator = pWordSegResult.iterator();
for (EnumItem<NR> nrEnumItem : roleTagList) {
sbLog.append('[');
sbLog.append(iterator.next().realWord);
sbLog.append(' ');
sbLog.append(nrEnumItem);
sbLog.append(']');
}
System.out.printf("人名角色观察:%s\n", sbLog.toString());
}
List<NR> nrList = viterbiComputeSimply(roleTagList);
if (HanLP.Config.DEBUG) {
StringBuilder sbLog = new StringBuilder();
Iterator<Vertex> iterator = pWordSegResult.iterator();
sbLog.append('[');
for (NR nr : nrList) {
sbLog.append(iterator.next().realWord);
sbLog.append('/');
sbLog.append(nr);
sbLog.append(" ,");
}
if (sbLog.length() > 1)
sbLog.delete(sbLog.length() - 2, sbLog.length());
sbLog.append(']');
System.out.printf("人名角色标注:%s\n", sbLog.toString());
}
PersonDictionary.parsePattern(nrList, pWordSegResult, wordNetOptimum, wordNetAll);
return true;
}
Aggregations