use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.
the class PersonAttrLibrary method init1.
// person.dic
private void init1() {
try (BufferedReader br = MyStaticValue.getPersonReader()) {
pnMap = new HashMap<String, PersonNatureAttr>();
String temp = null;
String[] strs = null;
PersonNatureAttr pna = null;
while ((temp = br.readLine()) != null) {
pna = new PersonNatureAttr();
strs = temp.split("\t");
pna = pnMap.get(strs[0]);
if (pna == null) {
pna = new PersonNatureAttr();
}
pna.addFreq(Integer.parseInt(strs[1]), Integer.parseInt(strs[2]));
pnMap.put(strs[0], pna);
}
} catch (NumberFormatException e) {
logger.warn("数字格式不正确", e);
} catch (IOException e) {
logger.warn("IO异常", e);
}
}
use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.
the class PersonAttrLibrary method init2.
// name_freq
private void init2() {
Map<String, int[][]> personFreqMap = MyStaticValue.getPersonFreqMap();
Set<Entry<String, int[][]>> entrySet = personFreqMap.entrySet();
PersonNatureAttr pna = null;
for (Entry<String, int[][]> entry : entrySet) {
pna = pnMap.get(entry.getKey());
if (pna == null) {
pna = new PersonNatureAttr();
pna.setlocFreq(entry.getValue());
pnMap.put(entry.getKey(), pna);
} else {
pna.setlocFreq(entry.getValue());
}
}
}
use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.
the class AsianPersonRecognition method nameFind.
/**
* 人名识别
*
* @param term
* @param offe
* @param freq
*/
private Term nameFind(int offe, int beginFreq, int size) {
StringBuilder sb = new StringBuilder();
int undefinite = 0;
skip = false;
PersonNatureAttr pna = null;
int index = 0;
int freq = 0;
double allFreq = 0;
Term term = null;
int i = offe;
for (; i < terms.length; i++) {
// 走到结尾处识别出来一个名字.
if (terms[i] == null) {
continue;
}
term = terms[i];
pna = term.termNatures().personAttr;
// 在这个长度的这个位置的词频,如果没有可能就干掉,跳出循环
if ((freq = pna.getFreq(size, index)) == 0) {
return null;
}
if (pna.allFreq > 0) {
undefinite++;
}
sb.append(term.getName());
allFreq += Math.log(term.termNatures().allFreq + 1);
allFreq += -Math.log((freq));
index++;
if (index == size + 2) {
break;
}
}
double score = -Math.log(FACTORY[size]);
score += allFreq;
double endFreq = 0;
// 开始寻找结尾词
boolean flag = true;
while (flag) {
i++;
if (i >= terms.length) {
endFreq = 10;
flag = false;
} else if (terms[i] != null) {
int twoWordFreq = NgramLibrary.getTwoWordFreq(term, terms[i]);
if (twoWordFreq > 3) {
return null;
}
endFreq = terms[i].termNatures().personAttr.end + 1;
flag = false;
}
}
score -= Math.log(endFreq);
score -= Math.log(beginFreq);
if (score > -3) {
return null;
}
if (allFreq > 0 && undefinite > 0) {
return null;
}
skip = undefinite == 0;
term = new Term(sb.toString(), offe, TermNatures.NR);
term.selfScore(score);
return term;
}
use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.
the class DATDictionary method personNameFull.
private static void personNameFull(DoubleArrayTire dat) throws NumberFormatException, IOException {
HashMap<String, PersonNatureAttr> personMap = new PersonAttrLibrary().getPersonMap();
AnsjItem ansjItem = null;
// 人名词性补录
Set<Entry<String, PersonNatureAttr>> entrySet = personMap.entrySet();
char c = 0;
String temp = null;
for (Entry<String, PersonNatureAttr> entry : entrySet) {
temp = entry.getKey();
if (temp.length() == 1 && (ansjItem = (AnsjItem) dat.getDAT()[temp.charAt(0)]) == null) {
ansjItem = new AnsjItem();
ansjItem.setBase(c);
ansjItem.setCheck(-1);
ansjItem.setStatus((byte) 3);
ansjItem.setName(temp);
dat.getDAT()[temp.charAt(0)] = ansjItem;
} else {
ansjItem = dat.getItem(temp);
}
if (ansjItem == null) {
continue;
}
if ((ansjItem.termNatures) == null) {
if (temp.length() == 1 && temp.charAt(0) < 256) {
ansjItem.termNatures = TermNatures.NULL;
} else {
ansjItem.termNatures = new TermNatures(TermNature.NR);
}
}
ansjItem.termNatures.setPersonNatureAttr(entry.getValue());
}
}
Aggregations