Search in sources :

Example 1 with PersonNatureAttr

use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.

the class PersonAttrLibrary method init1.

// person.dic
private void init1() {
    try (BufferedReader br = MyStaticValue.getPersonReader()) {
        pnMap = new HashMap<String, PersonNatureAttr>();
        String temp = null;
        String[] strs = null;
        PersonNatureAttr pna = null;
        while ((temp = br.readLine()) != null) {
            pna = new PersonNatureAttr();
            strs = temp.split("\t");
            pna = pnMap.get(strs[0]);
            if (pna == null) {
                pna = new PersonNatureAttr();
            }
            pna.addFreq(Integer.parseInt(strs[1]), Integer.parseInt(strs[2]));
            pnMap.put(strs[0], pna);
        }
    } catch (NumberFormatException e) {
        logger.warn("数字格式不正确", e);
    } catch (IOException e) {
        logger.warn("IO异常", e);
    }
}
Also used : PersonNatureAttr(org.ansj.domain.PersonNatureAttr) BufferedReader(java.io.BufferedReader) IOException(java.io.IOException)

Example 2 with PersonNatureAttr

use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.

the class PersonAttrLibrary method init2.

// name_freq
private void init2() {
    Map<String, int[][]> personFreqMap = MyStaticValue.getPersonFreqMap();
    Set<Entry<String, int[][]>> entrySet = personFreqMap.entrySet();
    PersonNatureAttr pna = null;
    for (Entry<String, int[][]> entry : entrySet) {
        pna = pnMap.get(entry.getKey());
        if (pna == null) {
            pna = new PersonNatureAttr();
            pna.setlocFreq(entry.getValue());
            pnMap.put(entry.getKey(), pna);
        } else {
            pna.setlocFreq(entry.getValue());
        }
    }
}
Also used : Entry(java.util.Map.Entry) PersonNatureAttr(org.ansj.domain.PersonNatureAttr)

Example 3 with PersonNatureAttr

use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.

the class AsianPersonRecognition method nameFind.

/**
	 * 人名识别
	 * 
	 * @param term
	 * @param offe
	 * @param freq
	 */
private Term nameFind(int offe, int beginFreq, int size) {
    StringBuilder sb = new StringBuilder();
    int undefinite = 0;
    skip = false;
    PersonNatureAttr pna = null;
    int index = 0;
    int freq = 0;
    double allFreq = 0;
    Term term = null;
    int i = offe;
    for (; i < terms.length; i++) {
        // 走到结尾处识别出来一个名字.
        if (terms[i] == null) {
            continue;
        }
        term = terms[i];
        pna = term.termNatures().personAttr;
        // 在这个长度的这个位置的词频,如果没有可能就干掉,跳出循环
        if ((freq = pna.getFreq(size, index)) == 0) {
            return null;
        }
        if (pna.allFreq > 0) {
            undefinite++;
        }
        sb.append(term.getName());
        allFreq += Math.log(term.termNatures().allFreq + 1);
        allFreq += -Math.log((freq));
        index++;
        if (index == size + 2) {
            break;
        }
    }
    double score = -Math.log(FACTORY[size]);
    score += allFreq;
    double endFreq = 0;
    // 开始寻找结尾词
    boolean flag = true;
    while (flag) {
        i++;
        if (i >= terms.length) {
            endFreq = 10;
            flag = false;
        } else if (terms[i] != null) {
            int twoWordFreq = NgramLibrary.getTwoWordFreq(term, terms[i]);
            if (twoWordFreq > 3) {
                return null;
            }
            endFreq = terms[i].termNatures().personAttr.end + 1;
            flag = false;
        }
    }
    score -= Math.log(endFreq);
    score -= Math.log(beginFreq);
    if (score > -3) {
        return null;
    }
    if (allFreq > 0 && undefinite > 0) {
        return null;
    }
    skip = undefinite == 0;
    term = new Term(sb.toString(), offe, TermNatures.NR);
    term.selfScore(score);
    return term;
}
Also used : PersonNatureAttr(org.ansj.domain.PersonNatureAttr) Term(org.ansj.domain.Term)

Example 4 with PersonNatureAttr

use of org.ansj.domain.PersonNatureAttr in project ansj_seg by NLPchina.

the class DATDictionary method personNameFull.

private static void personNameFull(DoubleArrayTire dat) throws NumberFormatException, IOException {
    HashMap<String, PersonNatureAttr> personMap = new PersonAttrLibrary().getPersonMap();
    AnsjItem ansjItem = null;
    // 人名词性补录
    Set<Entry<String, PersonNatureAttr>> entrySet = personMap.entrySet();
    char c = 0;
    String temp = null;
    for (Entry<String, PersonNatureAttr> entry : entrySet) {
        temp = entry.getKey();
        if (temp.length() == 1 && (ansjItem = (AnsjItem) dat.getDAT()[temp.charAt(0)]) == null) {
            ansjItem = new AnsjItem();
            ansjItem.setBase(c);
            ansjItem.setCheck(-1);
            ansjItem.setStatus((byte) 3);
            ansjItem.setName(temp);
            dat.getDAT()[temp.charAt(0)] = ansjItem;
        } else {
            ansjItem = dat.getItem(temp);
        }
        if (ansjItem == null) {
            continue;
        }
        if ((ansjItem.termNatures) == null) {
            if (temp.length() == 1 && temp.charAt(0) < 256) {
                ansjItem.termNatures = TermNatures.NULL;
            } else {
                ansjItem.termNatures = new TermNatures(TermNature.NR);
            }
        }
        ansjItem.termNatures.setPersonNatureAttr(entry.getValue());
    }
}
Also used : Entry(java.util.Map.Entry) TermNatures(org.ansj.domain.TermNatures) PersonNatureAttr(org.ansj.domain.PersonNatureAttr) AnsjItem(org.ansj.domain.AnsjItem) PersonAttrLibrary(org.ansj.library.name.PersonAttrLibrary)

Aggregations

PersonNatureAttr (org.ansj.domain.PersonNatureAttr)4 Entry (java.util.Map.Entry)2 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 AnsjItem (org.ansj.domain.AnsjItem)1 Term (org.ansj.domain.Term)1 TermNatures (org.ansj.domain.TermNatures)1 PersonAttrLibrary (org.ansj.library.name.PersonAttrLibrary)1