Search in sources :

Example 1 with AnsjItem

use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.

the class MyStaticValue method initBigramTables.

/**
	 * 词与词之间的关联表数据
	 * 
	 * @return
	 */
public static void initBigramTables() {
    try (BufferedReader reader = IOUtil.getReader(DicReader.getInputStream("bigramdict.dic"), "UTF-8")) {
        String temp = null;
        String[] strs = null;
        int freq = 0;
        while ((temp = reader.readLine()) != null) {
            if (StringUtil.isBlank(temp)) {
                continue;
            }
            strs = temp.split("\t");
            freq = Integer.parseInt(strs[1]);
            strs = strs[0].split("@");
            AnsjItem fromItem = DATDictionary.getItem(strs[0]);
            AnsjItem toItem = DATDictionary.getItem(strs[1]);
            if (fromItem == AnsjItem.NULL && strs[0].contains("#")) {
                fromItem = AnsjItem.BEGIN;
            }
            if (toItem == AnsjItem.NULL && strs[1].contains("#")) {
                toItem = AnsjItem.END;
            }
            if (fromItem == AnsjItem.NULL || toItem == AnsjItem.NULL) {
                continue;
            }
            if (fromItem.bigramEntryMap == null) {
                fromItem.bigramEntryMap = new HashMap<Integer, Integer>();
            }
            fromItem.bigramEntryMap.put(toItem.getIndex(), freq);
        }
    } catch (NumberFormatException e) {
        LOG.warn("数字格式异常", e);
    } catch (UnsupportedEncodingException e) {
        LOG.warn("不支持的编码", e);
    } catch (IOException e) {
        LOG.warn("IO异常", e);
    }
}
Also used : BufferedReader(java.io.BufferedReader) AnsjItem(org.ansj.domain.AnsjItem) UnsupportedEncodingException(java.io.UnsupportedEncodingException) IOException(java.io.IOException)

Example 2 with AnsjItem

use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.

the class NatureRecognition method getTermNatures.

/**
	 * 传入一次词语获得相关的词性
	 * 
	 * @param word
	 * @return
	 */
public TermNatures getTermNatures(String word) {
    String[] params = null;
    // 获得词性 , 先从系统辞典。在从用户自定义辞典
    AnsjItem ansjItem = DATDictionary.getItem(word);
    TermNatures tn = null;
    if (ansjItem != AnsjItem.NULL) {
        tn = ansjItem.termNatures;
    } else if ((params = getParams(word)) != null) {
        tn = new TermNatures(new TermNature(params[0], 1));
    } else if (WordAlert.isEnglish(word)) {
        tn = TermNatures.EN;
    } else if (WordAlert.isNumber(word)) {
        tn = TermNatures.M;
    } else {
        tn = TermNatures.NULL;
    }
    return tn;
}
Also used : TermNatures(org.ansj.domain.TermNatures) AnsjItem(org.ansj.domain.AnsjItem) TermNature(org.ansj.domain.TermNature)

Example 3 with AnsjItem

use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.

the class CoreLibraryMaker method insertToArray.

private static void insertToArray(Item[] dat, char c, byte status, String param) {
    AnsjItem ansjItem1 = new AnsjItem();
    ansjItem1.setName(String.valueOf(c));
    ansjItem1.setIndex(c);
    ansjItem1.setCheck(-1);
    ansjItem1.setStatus(status);
    ansjItem1.param = param;
    dat[c] = ansjItem1;
}
Also used : AnsjItem(org.ansj.domain.AnsjItem)

Example 4 with AnsjItem

use of org.ansj.domain.AnsjItem in project ansj_seg by NLPchina.

the class DATDictionary method personNameFull.

private static void personNameFull(DoubleArrayTire dat) throws NumberFormatException, IOException {
    HashMap<String, PersonNatureAttr> personMap = new PersonAttrLibrary().getPersonMap();
    AnsjItem ansjItem = null;
    // 人名词性补录
    Set<Entry<String, PersonNatureAttr>> entrySet = personMap.entrySet();
    char c = 0;
    String temp = null;
    for (Entry<String, PersonNatureAttr> entry : entrySet) {
        temp = entry.getKey();
        if (temp.length() == 1 && (ansjItem = (AnsjItem) dat.getDAT()[temp.charAt(0)]) == null) {
            ansjItem = new AnsjItem();
            ansjItem.setBase(c);
            ansjItem.setCheck(-1);
            ansjItem.setStatus((byte) 3);
            ansjItem.setName(temp);
            dat.getDAT()[temp.charAt(0)] = ansjItem;
        } else {
            ansjItem = dat.getItem(temp);
        }
        if (ansjItem == null) {
            continue;
        }
        if ((ansjItem.termNatures) == null) {
            if (temp.length() == 1 && temp.charAt(0) < 256) {
                ansjItem.termNatures = TermNatures.NULL;
            } else {
                ansjItem.termNatures = new TermNatures(TermNature.NR);
            }
        }
        ansjItem.termNatures.setPersonNatureAttr(entry.getValue());
    }
}
Also used : Entry(java.util.Map.Entry) TermNatures(org.ansj.domain.TermNatures) PersonNatureAttr(org.ansj.domain.PersonNatureAttr) AnsjItem(org.ansj.domain.AnsjItem) PersonAttrLibrary(org.ansj.library.name.PersonAttrLibrary)

Aggregations

AnsjItem (org.ansj.domain.AnsjItem)4 TermNatures (org.ansj.domain.TermNatures)2 BufferedReader (java.io.BufferedReader)1 IOException (java.io.IOException)1 UnsupportedEncodingException (java.io.UnsupportedEncodingException)1 Entry (java.util.Map.Entry)1 PersonNatureAttr (org.ansj.domain.PersonNatureAttr)1 TermNature (org.ansj.domain.TermNature)1 PersonAttrLibrary (org.ansj.library.name.PersonAttrLibrary)1